In [340]:
import os
import yaml
import subprocess
import sys
import xml.etree.ElementTree as ET
from collections import defaultdict

In [421]:
root_path = ('../')
class_path = root_path + 'src/main/java/'
testclass_path = root_path + 'src/test/java/'
jacoco_path = root_path + '/target/site/jacoco/jacoco.xml'
filter_empty_test_classes = True
debug = False
info = True

In [403]:
class PitMutator:
    def __init__(self, name: str):
        self.name: str = name
        self.tests_run: int = 0
        self.killed: int = 0
        self.survived: int = 0
        self.detected: int = 0
        self.undetected: int = 0
        self.no_coverage: int = 0

    def update_counts(self, status: str, detected: bool, tests_run: int):
        self.tests_run += tests_run
        if status == "KILLED":
            self.killed += 1
        elif status == "SURVIVED":
            self.survived += 1
        elif status == "NO_COVERAGE":
            self.no_coverage += 1
        if detected:
            self.detected += 1
        else:
            self.undetected += 1

class PitMetrics:
    def __init__(self):
        self.total_tests_run: int = 0
        self.total_killed: int = 0
        self.total_survived: int = 0
        self.total_detected: int = 0
        self.total_undetected: int = 0
        self.total_no_coverage: int = 0
        self.mutators: list[PitMutator] = []

    def add_mutator(self, mutator: PitMutator):
        self.mutators.append(mutator)

    def get_mutator(self, mutator_name: str):
        for mutator in self.mutators:
            if mutator.name == mutator_name:
                return mutator

    def update_totals(self, tests_run: int, killed: int, survived: int, detected: int, undetected: int, no_coverage: int):
        self.total_tests_run += tests_run
        self.total_killed += killed
        self.total_survived += survived
        self.total_detected += detected
        self.total_undetected += undetected
        self.total_no_coverage += no_coverage

class JacocoCounter:
    def __init__(self, name: str, total: int, covered: int, missed: int, coverage: float):
        self.name : str = name
        self.total : int = total
        self.covered : int = covered
        self.missed : int = missed
        self.coverage : float = coverage

class JacocoMetrics:
    def __init__(self):
        self.counters: list[JacocoCounter] = []

    def add_counter(self, counter : JacocoCounter):
        self.counters.append(counter)  

class TestClass:
    def __init__(self, name:str, package:str, llm:str, prompt:str):
        self.name : str = name
        self.package:str = package
        self.llm:str = llm
        self.prompt:str = prompt
        self.class_path:str = '.'.join([package, llm, prompt, name])
        self.directory_path:str = os.path.join(testclass_path, package, llm, prompt, f"{name}.java")
        self.jacoco_metrics: JacocoMetrics = JacocoMetrics()
        self.pit_metrics: PitMetrics = None

    def __repr__(self):
        return self.class_path
    
class Class:
    def __init__(self, name:str, package:str):
        self.name:str = name
        self.package:str = package
        self.class_path:str = '.'.join([package, name])
        self.directory_path:str = os.path.join(testclass_path, f"{name}.java")

    def __repr__(self):
        return self.class_path

class Package:
    def __init__(self, name: str):
        self.name: str = name
        self.classes: list[Class] = []
        self.test_classes: list[TestClass] = []

    def add_class(self, class_obj: Class) -> None:
        """Add a Class object to the package"""
        self.classes.append(class_obj)

    def add_test_class(self, test_class_obj: TestClass) -> None:
        """Add a TestClass object to the package"""
        self.test_classes.append(test_class_obj)

    def get_classes(self, class_name_filter:str=None) -> list[Class]:
        classes:list[Class] = []
        for c in self.classes:
            if (class_name_filter is None or c.name == class_name_filter):
                classes.append(c)
        return classes

    def get_test_classes(self, llm_filter:str=None, prompt_filter:str=None, testclass_name_filter:str=None) -> list[TestClass]:
        """Retrieve a TestClass by name"""
        test_classes:list[TestClass] = []
        for tc in self.test_classes:
            if (llm_filter is None or tc.llm == llm_filter):
                if (prompt_filter is None or tc.prompt == prompt_filter):
                    if (testclass_name_filter is None or tc.name == testclass_name_filter):
                        test_classes.append(tc)
        return test_classes

    def yaml_dump(self) -> str:
        """Return a YAML-like representation of the package structure"""
        package_data = {
            'package': self.name,
            'classes': [cls.get_class_path() for cls in self.classes],
            'test_classes': [
                {
                    'class_path': test_cls.get_class_path(),
                    'llm': test_cls.llm,
                    'prompt': test_cls.prompt
                } for test_cls in self.test_classes
            ]
        }
        return yaml.dump(package_data, sort_keys=False, indent=4)

    def __repr__(self):
        return f"Package(name={self.name}, classes={self.classes}, test_classes={self.test_classes})"
    
class Project:
    def __init__(self):
        self.name = "Quality Software"
        self.packages: list[Package] = []
    
    def add_package(self, package:Package):
        self.packages.append(package)

    def get_package(self, package: str) -> Package:
        for p in self.packages:
            if (p.name == package):
                return p
        return None
        
    def get_classes(self, package_filter=None) -> list[Class]:
        classes: list[Class] = []
        for package in self.packages:
            if (package_filter is None or package.name == package_filter):
                for c in package.classes:
                    classes.append(c)
        return classes
    
    def get_test_classes(self, package_filter:str=None, llm_filter:str=None, prompt_filter:str=None) -> list[TestClass]:
        test_classes: list[TestClass] = []
        for package in self.packages:
            if (package_filter is None or package.name == package_filter):
                for test_class in package.test_classes:
                    if ((llm_filter is None or test_class.llm == llm_filter)
                        and (prompt_filter is None or test_class.prompt == prompt_filter)):
                        test_classes.append(test_class)
        return test_classes

In [None]:
def debug_print(*args, **kwargs):
    if debug:
        print("DEBUG:", end="")
        print(*args, **kwargs)

def info_print(*args, **kwargs):
    if info:
        print("INFO:", end="")
        print(*args, **kwargs)

In [359]:
def isEmptyTestClass(test_class: TestClass) -> bool:
    dummy_string = "This is a placeholder test class"
    with open(test_class.directory_path, 'r') as f:
        if dummy_string in f.read():
            return True
    return False

def generate_project() -> Project:
    project:Project = Project()
    packages = os.listdir(class_path)
    for p in packages:
        package = Package(p)

        debug_print(f"Looking for classes under package {p}")
        classes = os.listdir(class_path + p)
        for c in classes:
            if c.endswith('.java'):
                class_name = c.split('.java')[0]
                class_obj = Class(class_name, package.name)
                debug_print(f"\tFound class {class_obj}")
                package.add_class(class_obj)
        
        debug_print(f"Looking for test classes under package {p}")
        llms = os.listdir(testclass_path + p)
        for llm in llms:
            prompts = os.listdir(f"{testclass_path}{p}/{llm}/")
            for prompt in prompts:
                testclasses = os.listdir(f'{testclass_path}{p}/{llm}/{prompt}/')
                for tc in testclasses:
                    if tc.endswith('.java'):
                        testclass_name = tc.split('.java')[0]
                        testclass_obj = TestClass(testclass_name, package.name, llm, prompt)
                        if (not (filter_empty_test_classes and isEmptyTestClass(testclass_obj))):                    
                            debug_print(f"\tFound test class {testclass_obj}")
                            package.add_test_class(testclass_obj)
                        else:
                            debug_print(f'\tIgnoring empty test class {testclass_obj}')
        project.add_package(package)
    return project
        

In [360]:
debug = True
project = generate_project()

DEBUG:Looking for classes under package anagrams
DEBUG:	Found class anagrams.Anagrams
DEBUG:Looking for test classes under package anagrams
DEBUG:	Found test class anagrams.metaAI.prompt0.AnagramsTest
DEBUG:	Ignoring empty test class anagrams.ai21Jamba.prompt1.anagramsai21JambaTest
DEBUG:	Ignoring empty test class anagrams.ai21Jamba.prompt3.anagramsai21JambaTest
DEBUG:	Ignoring empty test class anagrams.ai21Jamba.prompt2.anagramsai21JambaTest
DEBUG:	Found test class anagrams.ai21Jamba.prompt0.AnagramsTest
DEBUG:	Ignoring empty test class anagrams.ai21Jamba.prompt0.anagramsai21JambaTest
DEBUG:	Ignoring empty test class anagrams.ai21Jamba.prompt4.anagramsai21JambaTest
DEBUG:	Ignoring empty test class anagrams.chatGPT4o.prompt1.anagramschatGPT4oTest
DEBUG:	Ignoring empty test class anagrams.chatGPT4o.prompt3.anagramschatGPT4oTest
DEBUG:	Ignoring empty test class anagrams.chatGPT4o.prompt2.anagramschatGPT4oTest
DEBUG:	Found test class anagrams.chatGPT4o.prompt0.AnagramsChatGPT4o
DEBUG:	Ign

In [361]:
project.get_test_classes(package_filter='anagrams', prompt_filter='prompt0')

[anagrams.metaAI.prompt0.AnagramsTest,
 anagrams.ai21Jamba.prompt0.AnagramsTest,
 anagrams.chatGPT4o.prompt0.AnagramsChatGPT4o,
 anagrams.chatGPTo1Preview.prompt0.anagramschatGPTo1PreviewTest,
 anagrams.claudeSonnet.prompt0.AnagramsTestClaude35Sonnet]

In [362]:
def run_maven_clean():
    """Clean the project to remove any existing compiled classes and any existing reports."""
    debug_print("Cleaning project...")
    result = subprocess.run(["mvn", "clean"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error cleaning project")
        print(result.stderr)
        sys.exit(1)
    debug_print("Clean successful.")
def run_maven_compile():
    """Compile all classes in the project"""
    debug_print("Compiling project...")
    result = subprocess.run(["mvn", "compile"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during compilation:")
        print(result.stderr)
        sys.exit(1)
    debug_print("Compilation successful.")
def run_maven_test_compile():
    """Compile all test classes in the project"""
    debug_print("Compiling project test classes...")
    result = subprocess.run(["mvn", "test-compile"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during test compilation:")
        print(result.stdout)
        print(result.stderr)
        sys.exit(1)
    debug_print("Test compilation successful.")


In [409]:
def run_pit_for_single_test_class(target_class: Class, test_class: TestClass, mutators: str='ALL', ignore_failing_tests: bool=True, verbose: bool=False):
    """Run PIT mutation testing for a specific target class and test class."""
    debug_print(f"Running PIT for class '{target_class}' with test '{test_class}'...")
    
    # Construct the PIT command
    pit_command = [
        "mvn", "org.pitest:pitest-maven:mutationCoverage",
        f"-DtargetClasses={target_class.class_path}",
        f"-DtargetTests={test_class.class_path}",
        f"-Dmutators={mutators}"
    ]
    if (ignore_failing_tests):
        pit_command.append(f"-DignoreFailingTests=true")
    if (verbose):
        pit_command.append(f"-Dverbose=true")
    debug_print(f"Running command: {" ".join(pit_command)}")
    # Execute the PIT command
    result = subprocess.run(pit_command, capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during PIT execution:")
        print(result.stderr)
        sys.exit(1)
    
    # Print PIT output
    print(result.stdout)
    # TODO: parse output to ensure test was successful
    debug_print("PIT testing succeeded?")

In [364]:
def run_jacoco_for_single_test_class(test_class: TestClass):
    """Execute tests with jacoco coverage"""
    maven_command = ["mvn", "test", f"-Dtest={test_class.class_path}"]
    debug_print(f"Running test suite with jacoco coverage for {test_class}...")
    try:
        result = subprocess.run(maven_command, capture_output=True, text=True, cwd=root_path)
    except subprocess.CalledProcessError as e:
        print(f"An error occurred while running tests for {test_class}")
        print(e.stdout)
        print(e.stderr)
        return
    # TODO: Parse results to verify that the test run actually ran tests and generated a coverage report
    # Check if any tests were actually run
    if ("Tests run: 0" in result.stdout):
        print(f"Error: Test class {test_class} failed to run any tests.")
        print(result.stdout)
        return
    debug_print(result.stdout)
    debug_print("Test run succeeded?")

In [None]:
def parse_jacoco(jacoco_path, target_package, target_class) -> JacocoMetrics | None:
    try:
        tree = ET.parse(jacoco_path)
        root = tree.getroot()
        jacoco_metrics = JacocoMetrics()
        # Find the package matching the target package
        for package in root.findall('package'):
            package_name = package.get('name')
            if package_name == target_package:
                break
        # TODO: catch target package not found
        debug_print(f"Package: {package_name}")
            
        # Find the class matching the target class
        for cls in package.findall('class'):
            class_name = cls.get('name').replace('/', '.')
            if class_name.endswith(target_class):
                break
        # TODO: catch target class not found
        debug_print(f"\tClass: {class_name}")

        # Extract coverage metrics
        for counter in cls.findall('counter'):
            counter_type = counter.get('type')
            missed = int(counter.get('missed'))
            covered = int(counter.get('covered'))
            total = missed + covered
            coverage = (covered / total * 100) if total > 0 else 0
            jacoco_counter = JacocoCounter(counter_type, total, covered, missed, coverage)
            debug_print(f'\tCounter Type: {counter_type}, missed: {missed}, covered: {covered}, total: {total}, coverage: {coverage:.2f}')
            jacoco_metrics.add_counter(jacoco_counter)

        return jacoco_metrics

    except ET.ParseError:
        print("Error: Could not parse jacoco.xml. Check if the file is valid XML.")
    except FileNotFoundError:
        print(f"Error: File {jacoco_path} not found. Ensure the file path is correct.")

In [366]:
run_maven_clean()
run_maven_test_compile()
jacoco_path = root_path + '/target/site/jacoco/jacoco.xml'
test_classes = project.get_test_classes(package_filter="trityp", llm_filter="metaAI", prompt_filter="prompt0")
test_class = test_classes[0]
run_jacoco_for_single_test_class(test_class)


DEBUG:Cleaning project...
DEBUG:Clean successful.
DEBUG:Compiling project test classes...
DEBUG:Test compilation successful.
DEBUG:Running test suite with jacoco coverage for trityp.metaAI.prompt0.TritypTest...
DEBUG:[INFO] Scanning for projects...
[INFO] 
[INFO] --------------------< com.example:my-maven-project >--------------------
[INFO] Building my-maven-project 1.0-SNAPSHOT
[INFO]   from pom.xml
[INFO] --------------------------------[ jar ]---------------------------------
[INFO] 
[INFO] --- jacoco:0.8.8:prepare-agent (default) @ my-maven-project ---
[INFO] argLine set to -javaagent:/home/chris/.m2/repository/org/jacoco/org.jacoco.agent/0.8.8/org.jacoco.agent-0.8.8-runtime.jar=destfile=/home/chris/Documents/UT/SWTest/project/QualitySoftware/target/jacoco.exec
[INFO] 
[INFO] --- resources:3.3.1:resources (default-resources) @ my-maven-project ---
[INFO] skip non existing resourceDirectory /home/chris/Documents/UT/SWTest/project/QualitySoftware/src/main/resources
[INFO] 
[INFO] --

In [351]:
test_class.jacoco_metrics = parse_jacoco(jacoco_path, 'trityp', 'Trityp')


DEBUG:Package: trityp
DEBUG:	Class: trityp.Trityp
DEBUG:	Counter Type: INSTRUCTION, missed: 5, covered: 78, total: 83, coverage: 93.98
DEBUG:	Counter Type: BRANCH, missed: 3, covered: 31, total: 34, coverage: 91.18
DEBUG:	Counter Type: LINE, missed: 2, covered: 21, total: 23, coverage: 91.30
DEBUG:	Counter Type: COMPLEXITY, missed: 4, covered: 15, total: 19, coverage: 78.95
DEBUG:	Counter Type: METHOD, missed: 1, covered: 1, total: 2, coverage: 50.00
DEBUG:	Counter Type: CLASS, missed: 0, covered: 1, total: 1, coverage: 100.00


In [352]:
def print_jacoco_metrics(jacoco_metrics: JacocoMetrics):
    for counter in jacoco_metrics.counters:
        print(f"name: {counter.name}, total: {counter.total}, covered: {counter.covered}, missed: {counter.missed}, coverage: {counter.coverage:.2f}%")

In [353]:
print_jacoco_metrics(test_class.jacoco_metrics)

name: INSTRUCTION, total: 83, covered: 78, missed: 5, coverage: 93.98%
name: BRANCH, total: 34, covered: 31, missed: 3, coverage: 91.18%
name: LINE, total: 23, covered: 21, missed: 2, coverage: 91.30%
name: COMPLEXITY, total: 19, covered: 15, missed: 4, coverage: 78.95%
name: METHOD, total: 2, covered: 1, missed: 1, coverage: 50.00%
name: CLASS, total: 1, covered: 1, missed: 0, coverage: 100.00%


In [410]:
debug=True
run_maven_clean()
run_maven_compile()
run_maven_test_compile()
Trityp = project.get_classes(package_filter='trityp')[0]
TritypTest_ai21Jamba_prompt0 = project.get_test_classes(package_filter='trityp', llm_filter='ai21Jamba', prompt_filter='prompt0')[0]
run_pit_for_single_test_class(target_class=Trityp, test_class=TritypTest_ai21Jamba_prompt0)

DEBUG:Cleaning project...
DEBUG:Clean successful.
DEBUG:Compiling project...
DEBUG:Compilation successful.
DEBUG:Compiling project test classes...
DEBUG:Test compilation successful.
DEBUG:Running PIT for class 'trityp.Trityp' with test 'trityp.ai21Jamba.prompt0.TritypTest'...
DEBUG:Running command: mvn org.pitest:pitest-maven:mutationCoverage -DtargetClasses=trityp.Trityp -DtargetTests=trityp.ai21Jamba.prompt0.TritypTest -Dmutators=ALL -DignoreFailingTests=true
[INFO] Scanning for projects...
[INFO] 
[INFO] --------------------< com.example:my-maven-project >--------------------
[INFO] Building my-maven-project 1.0-SNAPSHOT
[INFO]   from pom.xml
[INFO] --------------------------------[ jar ]---------------------------------
[INFO] 
[INFO] --- pitest:1.6.7:mutationCoverage (default-cli) @ my-maven-project ---
[INFO] Found plugin : Default csv report plugin
[INFO] Found plugin : Default xml report plugin
[INFO] Found plugin : Default html report plugin
[INFO] Found plugin : Static initia

In [404]:
def parse_pit_report(report_path) -> PitMetrics:
    # Parse the XML file
    tree = ET.parse(report_path)
    root = tree.getroot()

    # Initialize the main metrics object
    pit_metrics = PitMetrics()

    for mutation in root.findall("mutation"):
        # Get mutation attributes
        detected = mutation.get("detected", "false") == "true"
        status = mutation.get("status", "UNKNOWN")
        number_of_tests_run = int(mutation.get("numberOfTestsRun", "0"))
        mutator_name = mutation.findtext("mutator", "Unknown")

        # Create or update the mutator entry
        if not (mutator := pit_metrics.get_mutator(mutator_name)):
            mutator = PitMutator(mutator_name)
            pit_metrics.add_mutator(mutator)
        mutator.update_counts(status, detected, number_of_tests_run)

    # Aggregate results into PitMetrics and update totals
    for mutator in pit_metrics.mutators:
        pit_metrics.update_totals(
            tests_run=mutator.tests_run,
            killed=mutator.killed,
            survived=mutator.survived,
            detected=mutator.detected,
            undetected=mutator.undetected,
            no_coverage=mutator.no_coverage
        )

    return pit_metrics


In [411]:
pit_dir = root_path + '/target/pit-reports/'
# assume only one report is available, if not there
pit_reports = os.listdir(pit_dir)
if (len(pit_reports) != 1):
    print("Error processing PIT reports, there is an incorrect number of reports in the PIT dir")
    print("Pit reports: ", pit_reports)
pit_report_timestamp = pit_reports[0]
pit_report_path = pit_dir + pit_report_timestamp + '/mutations.xml'
pit_metrics = parse_pit_report(pit_report_path)
    

In [412]:
def print_pit_metrics(pit_metrics: PitMetrics):
    print(
f"""Total
\tTests Run: {pit_metrics.total_tests_run}, Killed: {pit_metrics.total_killed}, Survived: {pit_metrics.total_survived}, Detected: {pit_metrics.total_detected}, Undetected: {pit_metrics.total_undetected}, No Coverage: {pit_metrics.total_no_coverage}"""
    )
    for mutator in pit_metrics.mutators:
        print(
f"""Mutator: {mutator.name}, 
\tTests Run: {mutator.tests_run}, Killed: {mutator.killed}, Survived: {mutator.survived}, Detected: {mutator.detected}, Undetected: {mutator.undetected}, No Coverage: {mutator.no_coverage}"""
        )

In [418]:
def print_total_pit_metrics(pit_metrics: PitMetrics):
    print(
f"""PIT Coverage:
\tTests Run: {pit_metrics.total_tests_run}, Killed: {pit_metrics.total_killed}, Survived: {pit_metrics.total_survived}, Detected: {pit_metrics.total_detected}, Undetected: {pit_metrics.total_undetected}, No Coverage: {pit_metrics.total_no_coverage}"""
    )

In [419]:
print_total_pit_metrics(pit_metrics)

PIT Coverage:
	Tests Run: 417, Killed: 295, Survived: 122, Detected: 295, Undetected: 162, No Coverage: 40


In [None]:
def run_coverage_reports_for_single_test_class(package: Package, target_class: Class, test_class: TestClass):
    run_maven_clean()
    run_maven_compile()
    run_maven_test_compile()
test_classes = project.get_test_classes(package_filter="trityp", llm_filter="metaAI", prompt_filter="prompt0")
test_class = test_classes[0]
run_jacoco_for_single_test_class(test_class)