In [3]:
import os
import yaml
import subprocess
import sys
import xml.etree.ElementTree as ET
from collections import defaultdict

In [2]:
root_path = '../'
class_path = '../src/main/java/'
testclass_path = '../src/test/java/'
jacoco_path = '../target/site/jacoco/jacoco.xml'
pit_dir = '../target/pit-reports/'
filter_empty_test_classes = True
debug = False
info = True

In [None]:
import json

class PitMutator:
    def __init__(self, name: str):
        self.name: str = name
        self.tests_run: int = 0
        self.killed: int = 0
        self.survived: int = 0
        self.detected: int = 0
        self.undetected: int = 0
        self.no_coverage: int = 0

    def update_counts(self, status: str, detected: bool, tests_run: int):
        self.tests_run += tests_run
        if status == "KILLED":
            self.killed += 1
        elif status == "SURVIVED":
            self.survived += 1
        elif status == "NO_COVERAGE":
            self.no_coverage += 1
        if detected:
            self.detected += 1
        else:
            self.undetected += 1

    def json_dump(self):
        return {
            "name": self.name,
            "tests_run": self.tests_run,
            "killed": self.killed,
            "survived": self.survived,
            "detected": self.detected,
            "undetected": self.undetected,
            "no_coverage": self.no_coverage
        }

class PitMetrics:
    def __init__(self):
        self.total_tests_run: int = 0
        self.total_killed: int = 0
        self.total_survived: int = 0
        self.total_detected: int = 0
        self.total_undetected: int = 0
        self.total_no_coverage: int = 0
        self.mutators: list[PitMutator] = []

    def add_mutator(self, mutator: PitMutator):
        self.mutators.append(mutator)

    def get_mutator(self, name):
        for mutator in self.mutators:
            if (mutator.name == name):
                return mutator
        return

    def update_totals(self, tests_run: int, killed: int, survived: int, detected: int, undetected: int, no_coverage: int):
        self.total_tests_run += tests_run
        self.total_killed += killed
        self.total_survived += survived
        self.total_detected += detected
        self.total_undetected += undetected
        self.total_no_coverage += no_coverage

    def json_dump(self):
        return {
            "total_tests_run": self.total_tests_run,
            "total_killed": self.total_killed,
            "total_survived": self.total_survived,
            "total_detected": self.total_detected,
            "total_undetected": self.total_undetected,
            "total_no_coverage": self.total_no_coverage,
            "mutators": [mutator.json_dump() for mutator in self.mutators]
        }

class JacocoCounter:
    def __init__(self, name: str, total: int, covered: int, missed: int, coverage: float):
        self.name : str = name
        self.total : int = total
        self.covered : int = covered
        self.missed : int = missed
        self.coverage : float = coverage

    def json_dump(self):
        return {
            "name": self.name,
            "total": self.total,
            "covered": self.covered,
            "missed": self.missed,
            "coverage": self.coverage
        }

class JacocoMetrics:
    def __init__(self):
        self.counters: list[JacocoCounter] = []

    def add_counter(self, counter: JacocoCounter):
        self.counters.append(counter)

    def json_dump(self):
        return {
            "counters": [counter.json_dump() for counter in self.counters]
        }

class TestClass:
    def __init__(self, name: str, package: str, llm: str, prompt: str):
        self.name : str = name
        self.package: str = package
        self.llm: str = llm
        self.prompt: str = prompt
        self.class_path: str = '.'.join([package, llm, prompt, name])
        self.directory_path: str = os.path.join('../src/test/java', package, llm, prompt, f"{name}.java")
        self.jacoco_metrics: JacocoMetrics = JacocoMetrics()
        self.pit_metrics: PitMetrics = PitMetrics()

    def json_dump(self):
        return {
            "name": self.name,
            "package": self.package,
            "llm": self.llm,
            "prompt": self.prompt,
            "class_path": self.class_path,
            "directory_path": self.directory_path,
            "jacoco_metrics": self.jacoco_metrics.json_dump(),
            "pit_metrics": self.pit_metrics.json_dump()
        }
    
class Class:
    def __init__(self, name:str, package:str):
        self.name:str = name
        self.package:str = package
        self.class_path:str = '.'.join([package, name])
        self.directory_path:str = os.path.join(testclass_path, f"{name}.java")

    def __repr__(self):
        return self.class_path

class Package:
    def __init__(self, name: str):
        self.name: str = name
        self.classes: list[Class] = []
        self.test_classes: list[TestClass] = []

    def add_class(self, class_obj: Class) -> None:
        """Add a Class object to the package"""
        self.classes.append(class_obj)

    def add_test_class(self, test_class_obj: TestClass) -> None:
        """Add a TestClass object to the package"""
        self.test_classes.append(test_class_obj)

    def get_classes(self, class_name_filter:str=None) -> list[Class]:
        classes:list[Class] = []
        for c in self.classes:
            if (class_name_filter is None or c.name == class_name_filter):
                classes.append(c)
        return classes

    def get_test_classes(self, llm_filter:str=None, prompt_filter:str=None, testclass_name_filter:str=None) -> list[TestClass]:
        """Retrieve a TestClass by name"""
        test_classes:list[TestClass] = []
        for tc in self.test_classes:
            if (llm_filter is None or tc.llm == llm_filter):
                if (prompt_filter is None or tc.prompt == prompt_filter):
                    if (testclass_name_filter is None or tc.name == testclass_name_filter):
                        test_classes.append(tc)
        return test_classes
    
    def yaml_dump(self) -> str:
        """Return a YAML-like representation of the package structure"""
        package_data = {
            'package': self.name,
            'classes': [cls.get_class_path() for cls in self.classes],
            'test_classes': [
                {
                    'class_path': test_cls.get_class_path(),
                    'llm': test_cls.llm,
                    'prompt': test_cls.prompt
                } for test_cls in self.test_classes
            ]
        }
        return yaml.dump(package_data, sort_keys=False, indent=4)

    def __repr__(self):
        return f"Package(name={self.name}, classes={self.classes}, test_classes={self.test_classes})"
    
class Project:
    def __init__(self):
        self.name = "Quality Software"
        self.packages: list[Package] = []
    
    def add_package(self, package:Package):
        self.packages.append(package)

    def get_package(self, package: str) -> Package:
        for p in self.packages:
            if (p.name == package):
                return p
        return None
        
    def get_classes(self, package_filter=None) -> list[Class]:
        classes: list[Class] = []
        for package in self.packages:
            if (package_filter is None or package.name == package_filter):
                for c in package.classes:
                    classes.append(c)
        return classes
    
    def get_test_classes(self, package_filter:str=None, llm_filter:str=None, prompt_filter:str=None) -> list[TestClass]:
        test_classes: list[TestClass] = []
        for package in self.packages:
            if (package_filter is None or package.name == package_filter):
                for test_class in package.test_classes:
                    if ((llm_filter is None or test_class.llm == llm_filter)
                        and (prompt_filter is None or test_class.prompt == prompt_filter)):
                        test_classes.append(test_class)
        return test_classes

In [4]:
def debug_print(*args, **kwargs):
    if debug:
        print("DEBUG:", end="")
        print(*args, **kwargs)

def info_print(*args, **kwargs):
    if info:
        print("INFO:", end="")
        print(*args, **kwargs)

In [5]:
def isEmptyTestClass(test_class: TestClass) -> bool:
    dummy_string = "This is a placeholder test class"
    with open(test_class.directory_path, 'r') as f:
        if dummy_string in f.read():
            return True
    return False

def generate_project() -> Project:
    project:Project = Project()
    packages = os.listdir(class_path)
    for p in packages:
        package = Package(p)

        debug_print(f"Looking for classes under package {p}")
        classes = os.listdir(class_path + p)
        for c in classes:
            if c.endswith('.java'):
                class_name = c.split('.java')[0]
                class_obj = Class(class_name, package.name)
                debug_print(f"\tFound class {class_obj}")
                package.add_class(class_obj)
        
        debug_print(f"Looking for test classes under package {p}")
        llms = os.listdir(testclass_path + p)
        for llm in llms:
            prompts = os.listdir(f"{testclass_path}{p}/{llm}/")
            for prompt in prompts:
                testclasses = os.listdir(f'{testclass_path}{p}/{llm}/{prompt}/')
                for tc in testclasses:
                    if tc.endswith('.java'):
                        testclass_name = tc.split('.java')[0]
                        testclass_obj = TestClass(testclass_name, package.name, llm, prompt)
                        if (not (filter_empty_test_classes and isEmptyTestClass(testclass_obj))):                    
                            debug_print(f"\tFound test class {testclass_obj}")
                            package.add_test_class(testclass_obj)
                        else:
                            debug_print(f'\tIgnoring empty test class {testclass_obj}')
        project.add_package(package)
    return project
        

NameError: name 'Project' is not defined

In [6]:
debug = True
project = generate_project()

NameError: name 'generate_project' is not defined

In [81]:
def run_maven_clean():
    """Clean the project to remove any existing compiled classes and any existing reports."""
    info_print("Cleaning project...")
    result = subprocess.run(["mvn", "clean"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error cleaning project")
        print(result.stderr)
        sys.exit(1)
    info_print("Clean successful.")
def run_maven_compile():
    """Compile all classes in the project"""
    info_print("Compiling project...")
    result = subprocess.run(["mvn", "compile"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during compilation:")
        print(result.stderr)
        sys.exit(1)
    info_print("Compilation successful.")
def run_maven_test_compile():
    """Compile all test classes in the project"""
    info_print("Compiling project test classes...")
    result = subprocess.run(["mvn", "test-compile"], capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during test compilation:")
        print(result.stdout)
        print(result.stderr)
        sys.exit(1)
    info_print("Test compilation successful.")


In [128]:
def run_pit_for_single_test_class(target_class: Class, test_class: TestClass, mutators: str='ALL', ignore_failing_tests: bool=True, verbose: bool=False):
    """Run PIT mutation testing for a specific target class and test class."""
    info_print(f"Running PIT for class '{target_class}' with test '{test_class}'...")
    
    # Construct the PIT command
    pit_command = [
        "mvn", "org.pitest:pitest-maven:mutationCoverage",
        f"-DtargetClasses={target_class.class_path}",
        f"-DtargetTests={test_class.class_path}",
        f"-Dmutators={mutators}"    
    ]
    if (ignore_failing_tests):
        print("Ignoring failures")
        pit_command.append("-Dmaven.test.failure.ignore=true")
    if (verbose):
        pit_command.append(f"-Dverbose=true")
    debug_print(f"Running command: {" ".join(pit_command)}")
    # Execute the PIT command
    result = subprocess.run(pit_command, capture_output=True, text=True, cwd=root_path)
    if result.returncode != 0:
        print("Error during PIT execution:")
        print(result.stderr)
        sys.exit(1)
    
    # Print PIT output
    debug_print(result.stdout)
    # TODO: parse output to ensure test was successful
    info_print("PIT testing succeeded?")

In [83]:
def run_jacoco_for_single_test_class(test_class: TestClass):
    """Execute tests with jacoco coverage"""
    maven_command = ["mvn", "test", f"-Dtest={test_class.class_path}"]
    info_print(f"Running test suite with jacoco coverage for {test_class}...")
    try:
        result = subprocess.run(maven_command, capture_output=True, text=True, cwd=root_path)
    except subprocess.CalledProcessError as e:
        print(f"An error occurred while running tests for {test_class}")
        print(e.stdout)
        print(e.stderr)
        return
    # TODO: Parse results to verify that the test run actually ran tests and generated a coverage report
    # Check if any tests were actually run
    if ("Tests run: 0" in result.stdout):
        print(f"Error: Test class {test_class} failed to run any tests.")
        print(result.stdout)
        return
    debug_print(result.stdout)
    info_print("Test run succeeded?")

In [84]:
def parse_jacoco(jacoco_path, target_package, target_class) -> JacocoMetrics | None:
    try:
        tree = ET.parse(jacoco_path)
        root = tree.getroot()
        jacoco_metrics = JacocoMetrics()
        # Find the package matching the target package
        for package in root.findall('package'):
            package_name = package.get('name')
            if package_name == target_package:
                break
        # TODO: catch target package not found
        debug_print(f"Package: {package_name}")
            
        # Find the class matching the target class
        for cls in package.findall('class'):
            class_name = cls.get('name').replace('/', '.')
            if class_name.endswith(target_class):
                break
        # TODO: catch target class not found
        debug_print(f"\tClass: {class_name}")

        # Extract coverage metrics
        for counter in cls.findall('counter'):
            counter_type = counter.get('type')
            missed = int(counter.get('missed'))
            covered = int(counter.get('covered'))
            total = missed + covered
            coverage = (covered / total * 100) if total > 0 else 0
            jacoco_counter = JacocoCounter(counter_type, total, covered, missed, coverage)
            debug_print(f'\tCounter Type: {counter_type}, missed: {missed}, covered: {covered}, total: {total}, coverage: {coverage:.2f}')
            jacoco_metrics.add_counter(jacoco_counter)

        return jacoco_metrics

    except ET.ParseError:
        print("Error: Could not parse jacoco.xml. Check if the file is valid XML.")
    except FileNotFoundError:
        print(f"Error: File {jacoco_path} not found. Ensure the file path is correct.")

In [88]:
def print_jacoco_metrics(jacoco_metrics: JacocoMetrics):
    for counter in jacoco_metrics.counters:
        print(f"name: {counter.name}, total: {counter.total}, covered: {counter.covered}, missed: {counter.missed}, coverage: {counter.coverage:.2f}%")

In [1]:
debug=True
run_maven_clean()
run_maven_compile()
run_maven_test_compile()
Trityp = project.get_classes(package_filter='trityp')[0]
TritypTest_ai21Jamba_prompt0 = project.get_test_classes(package_filter='trityp', llm_filter='ai21Jamba', prompt_filter='prompt0')[0]
run_pit_for_single_test_class(target_class=Trityp, test_class=TritypTest_ai21Jamba_prompt0)

NameError: name 'run_maven_clean' is not defined

In [93]:
def parse_pit_report(report_path) -> PitMetrics:
    # Parse the XML file
    tree = ET.parse(report_path)
    root = tree.getroot()

    # Initialize the main metrics object
    pit_metrics = PitMetrics()

    for mutation in root.findall("mutation"):
        # Get mutation attributes
        detected = mutation.get("detected", "false") == "true"
        status = mutation.get("status", "UNKNOWN")
        number_of_tests_run = int(mutation.get("numberOfTestsRun", "0"))
        mutator_name = mutation.findtext("mutator", "Unknown")

        # Create or update the mutator entry
        if not (mutator := pit_metrics.get_mutator(mutator_name)):
            mutator = PitMutator(mutator_name)
            pit_metrics.add_mutator(mutator)
        mutator.update_counts(status, detected, number_of_tests_run)

    # Aggregate results into PitMetrics and update totals
    for mutator in pit_metrics.mutators:
        pit_metrics.update_totals(
            tests_run=mutator.tests_run,
            killed=mutator.killed,
            survived=mutator.survived,
            detected=mutator.detected,
            undetected=mutator.undetected,
            no_coverage=mutator.no_coverage
        )

    return pit_metrics


In [95]:
def print_pit_metrics(pit_metrics: PitMetrics):
    print(
f"""Total
\tTests Run: {pit_metrics.total_tests_run}, Killed: {pit_metrics.total_killed}, Survived: {pit_metrics.total_survived}, Detected: {pit_metrics.total_detected}, Undetected: {pit_metrics.total_undetected}, No Coverage: {pit_metrics.total_no_coverage}"""
    )
    for mutator in pit_metrics.mutators:
        print(
f"""Mutator: {mutator.name}, 
\tTests Run: {mutator.tests_run}, Killed: {mutator.killed}, Survived: {mutator.survived}, Detected: {mutator.detected}, Undetected: {mutator.undetected}, No Coverage: {mutator.no_coverage}"""
        )

In [96]:
def print_total_pit_metrics(pit_metrics: PitMetrics):
    print(
f"""PIT Coverage:
\tTests Run: {pit_metrics.total_tests_run}, Killed: {pit_metrics.total_killed}, Survived: {pit_metrics.total_survived}, Detected: {pit_metrics.total_detected}, Undetected: {pit_metrics.total_undetected}, No Coverage: {pit_metrics.total_no_coverage}"""
    )

In [98]:
def get_pit_report_path():
    pit_dir = '../target/pit-reports/'
    pit_reports = os.listdir(pit_dir)
    if (len(pit_reports) != 1):
        print("Error processing PIT reports, there is an incorrect number of reports in the PIT dir")
        print("Pit reports: ", pit_reports)
    pit_report_timestamp = pit_reports[0]
    pit_report_path = pit_dir + pit_report_timestamp + '/mutations.xml'
    return pit_report_path


In [131]:
def run_coverage_reports_for_single_test_class(package: Package, target_class: Class, test_class: TestClass):
    # run_maven_clean()
    # run_maven_compile()
    # run_maven_test_compile()
    run_jacoco_for_single_test_class(test_class)
    test_class.jacoco_metrics = parse_jacoco(jacoco_path, package.name, target_class.name)
    # print_jacoco_metrics(test_class.jacoco_metrics)
    # run_pit_for_single_test_class(target_class, test_class)
    # test_class.pit_metrics = parse_pit_report(get_pit_report_path())
    # print_total_pit_metrics(test_class.pit_metrics)

In [132]:
package = project.get_package('trityp')
target_class = package.get_classes()[0]
target_test_class = package.get_test_classes(llm_filter="ai21Jamba")[0]
run_coverage_reports_for_single_test_class(package, target_class, test_class)

In [142]:
all_test_classes = project.get_test_classes(llm_filter="chatGPT4o")

In [143]:
debug=False
info=False

In [144]:
for test_class in all_test_classes:
    print(test_class.class_path)

anagrams.chatGPT4o.prompt1.AnagramsTest
anagrams.chatGPT4o.prompt0.AnagramsChatGPT4o
binaryTree.chatGPT4o.prompt0.binaryTreechatGPT4oTest
orangesRotting.chatGPT4o.prompt0.OrangesRotTest
numIslands.chatGPT4o.prompt0.NumIslandsTestGPT4
cloneGraph.chatGPT4o.prompt0.cloneGraphchatGPT4oTest
NQueens.chatGPT4o.prompt0.NQueensTest
MedianSortedArrays.chatGPT4o.prompt0.MedianSortedArrays
RegularExpressionMatching.chatGPT4o.prompt0.RegularTest
trityp.chatGPT4o.prompt0.TritypTestChatGPT4o


In [145]:
results = dict({})
for test_class in all_test_classes:
    print("Getting results for: ", test_class.class_path)
    package = project.get_package(test_class.package)
    target_class = project.get_classes(package_filter=test_class.package)[0]
    tests_failed = False
    if (tests_failed):
        print("Test failed, skipping")
        continue
    run_coverage_reports_for_single_test_class(package, target_class, test_class)
    test_class.json_dump()
    results[test_class.class_path] = test_class.json_dump()

Getting results for:  anagrams.chatGPT4o.prompt1.AnagramsTest
Getting results for:  anagrams.chatGPT4o.prompt0.AnagramsChatGPT4o
Getting results for:  binaryTree.chatGPT4o.prompt0.binaryTreechatGPT4oTest
Error: Test class <__main__.TestClass object at 0x78d8640e2f90> failed to run any tests.
[INFO] Scanning for projects...
[INFO] 
[INFO] --------------------< com.example:my-maven-project >--------------------
[INFO] Building my-maven-project 1.0-SNAPSHOT
[INFO]   from pom.xml
[INFO] --------------------------------[ jar ]---------------------------------
[INFO] 
[INFO] --- jacoco:0.8.8:prepare-agent (default) @ my-maven-project ---
[INFO] argLine set to -javaagent:/home/chris/.m2/repository/org/jacoco/org.jacoco.agent/0.8.8/org.jacoco.agent-0.8.8-runtime.jar=destfile=/home/chris/Documents/UT/SWTest/project/QualitySoftware/target/jacoco.exec
[INFO] 
[INFO] --- resources:3.3.1:resources (default-resources) @ my-maven-project ---
[INFO] skip non existing resourceDirectory /home/chris/Docu

In [148]:
print(json.dumps(results, indent=4))

{
    "anagrams.chatGPT4o.prompt1.AnagramsTest": {
        "name": "AnagramsTest",
        "package": "anagrams",
        "llm": "chatGPT4o",
        "prompt": "prompt1",
        "class_path": "anagrams.chatGPT4o.prompt1.AnagramsTest",
        "directory_path": "../src/test/java/anagrams/chatGPT4o/prompt1/AnagramsTest.java",
        "jacoco_metrics": {
            "counters": [
                {
                    "name": "INSTRUCTION",
                    "total": 57,
                    "covered": 57,
                    "missed": 0,
                    "coverage": 100.0
                },
                {
                    "name": "BRANCH",
                    "total": 4,
                    "covered": 4,
                    "missed": 0,
                    "coverage": 100.0
                },
                {
                    "name": "LINE",
                    "total": 10,
                    "covered": 10,
                    "missed": 0,
                    "coverage": 1