# Requirements

In [30]:
!pip install -q pytest pytest-cov pytest-assume python-dotenv

In [2]:
from huggingface_hub import login

# Paste your token when prompted
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

---

### Loading data

In [3]:
from datasets import load_dataset
import pandas as pd
import os

n_problems = 10 #
n_samples_per_prompt = 5
print("Loading dataset...")

dataset = load_dataset("dz1/CodeScore-MBPP-ET", split="train")[:n_problems]
df_tests = pd.DataFrame({'tests': dataset["test_list"],})
print(df_tests.head)

Loading dataset...
<bound method NDFrame.head of                                                tests
0  [assert min_cost([[1, 2, 3], [4, 8, 2], [1, 5,...
1  [assert similar_elements((3, 4, 5, 6),(5, 7, 4...
2  [assert is_not_prime(2) == False, assert is_no...
3  [assert heap_queue_largest( [25, 35, 22, 85, 1...
4  [assert count_ways(2) == 3, assert count_ways(...
5  [assert differ_At_One_Bit_Pos(13,9) == True, a...
6  [assert find_char_long('Please move back to st...
7  [assert square_nums([1, 2, 3, 4, 5, 6, 7, 8, 9...
8  [assert find_Rotations("aaaa") == 1, assert fi...
9  [assert small_nnum([10, 20, 50, 70, 90, 20, 50...>


In [None]:


try:
    # Load your CSVs
    df_code = pd.read_csv('model_outputs_zephyr_cot.csv', usecols=['code'])
    print(f"Loaded {len(df_code)} code snippets.")
    print(df_code.head)
except FileNotFoundError:
    print("ERROR: Could not find .csv.")
    print("Please make sure your files are in the correct location.")
except ValueError as e:
    print(f"ERROR: A column might be missing. Make sure you have a 'code' and 'tests' column.")
    print(f"Details: {e}")

Loaded 50 code snippets.
<bound method NDFrame.head of                                                  code
0   def min_cost(cost):\n    n = len(cost[0])\n   ...
1   def min_cost(cost, m, n):\n    # Initialize th...
2   def min_cost(cost):\n    m, n = len(cost), len...
3   def min_cost(cost, m, n):\n    # Initialize th...
4   def min_cost(cost):\n    m, n = len(cost), len...
5   def similar_elements(tuple1, tuple2):\n    ret...
6   def similar_elements(list1, list2):\n    set1 ...
7   def similar_elements(list1, list2):\n    # Cre...
8   def similar_elements(tuple1, tuple2):\n    sim...
9   def similar_elements(list1, list2):\n    set1 ...
10  def is_not_prime(n):\n    chain_of_thought = "...
11  def is_not_prime(n):\n    chain_of_thought = [...
12  def is_not_prime(n):\n    if n <= 1:\n        ...
13  def is_not_prime(n):\n    # Iterate through al...
14  def is_not_prime(n):\n    chain_of_thought = [...
15  import heapq\n\ndef heap_queue_largest(numbers...
16  import heapq\n\ndef hea

### Preparing code for tests

In [13]:
import os
import math
import pandas as pd

def _decode_text(s):
    """Decode escaped sequences if s is a string, else return as-is."""
    if not isinstance(s, str):
        return s
    try:
        return s.encode("utf-8").decode("unicode_escape")
    except Exception:
        return s

def _normalize_tests_for_injection(tests_cell):
    """
    Return a list of test lines (strings) to insert inside the test function.
    Handles:
      - actual Python lists of assertions
      - string like "[assert ... , assert ...]"
      - multiline strings with '\n' escapes
    """
    if isinstance(tests_cell, list):
        # assume already list of assertion strings
        return [ _decode_text(str(x)).strip() for x in tests_cell if str(x).strip() ]

    if not isinstance(tests_cell, str):
        return []

    s = _decode_text(tests_cell).strip()

    # strip surrounding brackets if present
    if s.startswith("[") and s.endswith("]"):
        s = s[1:-1].strip()

    # if multiple assertions separated by ', assert' -> split nicely
    # also handle actual newlines
    if "\n" in s:
        lines = [line.strip() for line in s.splitlines() if line.strip()]
    elif ", assert" in s:
        # keep the leading "assert" on the first element
        parts = s.split(", assert")
        lines = []
        for idx, p in enumerate(parts):
            p = p.strip()
            if idx > 0 and not p.startswith("assert"):
                p = "assert " + p
            lines.append(p)
    elif "assert" in s:
        # single-line with one assert (or semicolon-separated)
        if ";" in s:
            parts = [p.strip() for p in s.split(";") if p.strip()]
            lines = [p if p.startswith("assert") else ("assert " + p) for p in parts]
        else:
            lines = [s]
    else:
        # no 'assert' found; treat whole thing as single line
        lines = [s] if s else []

    # final clean-up: ensure each line is a valid expression string
    return [line for line in lines if line]

def generate_test_files(df_code, df_tests, output_dir="generated_tests"):
    """
    For each row in df_tests (index t_idx), create up to 5 files corresponding to
    code rows at indices (t_idx*5 + iter_idx) where iter_idx in 0..4.
    Each file contains the code snippet (decoded from escaped form) followed by
    a test function `test_generated_snippet()` containing the test assertions.
    Files are named: problem_{t_idx}_iteration_{iter_idx}.py
    """
    os.makedirs(output_dir, exist_ok=True)

    n_code = len(df_code)
    n_tests = len(df_tests)

    for t_idx, tests_row in df_tests.iterrows():
        raw_tests = tests_row.get("tests", "")
        test_lines = _normalize_tests_for_injection(raw_tests)

        for iter_idx in range(5):
            code_idx = t_idx * 5 + iter_idx
            if code_idx >= n_code:
                # no corresponding code row — skip
                continue

            code_cell = df_code.iloc[code_idx].get("code", "")
            if not isinstance(code_cell, str) or not code_cell.strip():
                # skip empty code cells
                continue

            # decode escaped newlines/tabs etc.
            code_text = _decode_text(code_cell).rstrip()

            # ensure code ends with exactly one newline
            if not code_text.endswith("\n"):
                code_text = code_text + "\n"

            # build the test function body
            file_lines = []
            file_lines.append(code_text)
            file_lines.append("def test_generated_snippet():\n")

            if test_lines:
                for line in test_lines:
                    # indent each test line with 4 spaces
                    file_lines.append(f"    {line.rstrip()}\n")
            else:
                file_lines.append("    pass\n")

            # final content
            file_content = "".join(file_lines)

            # write file
            filename = os.path.join(output_dir, f"problem_{t_idx}_iteration_{iter_idx}.py")
            with open(filename, "w", encoding="utf-8") as f:
                f.write(file_content)

    print(f"✅ Generated test files in '{output_dir}' (tests outer loop, code inner loop).")

generate_test_files(df_code, df_tests)


# Example usage:
# generate_test_files(df_code, df_tests)


NameError: name 'df_code' is not defined

# Baseline Coverage 

In [121]:
import os
import subprocess
import sys
import xml.etree.ElementTree as ET
import re
from pathlib import Path

# --- Configuration ---
test_directory = "./tests"
code_directory = "./generated_code"
report_file = "pytest_report.xml"  # Temp file to store test results
# --- End Configuration ---


def run_all_tests(test_dir_str: str, src_dir_str: str):
    test_dir = Path(test_dir_str)
    src_dir = Path(src_dir_str)

    if not test_dir.is_dir():
        print(f"Error: Test folder not found: {test_dir.resolve()}")
        return
    if not src_dir.is_dir():
        print(f"Error: Code folder not found: {src_dir.resolve()}")
        return

    print("Starting test run...")
    print(f"Running tests in: {test_dir.resolve()}")
    print(f"Measuring coverage for: {src_dir.resolve()}")
    print("=" * 70)
    
    subprocess.run("coverage erase", shell=True, capture_output=True)
    
    # --- Main Execution Command ---
    # We've removed -v and -s, and added --junit-xml
    command = [
        sys.executable,
        "-m", "pytest",
        f"--cov={src_dir}",
        "--cov-branch",
        f"--junit-xml={report_file}",
        test_dir
    ]
    
    # Run pytest. Output will be cleaner now.
    subprocess.run(command, text=True)
    
    # --- 1. Final Coverage Report ---
    print("=" * 70)
    print("All tests finished.")
    print("Generating final combined coverage report...")
    print("=" * 70)
    
    subprocess.run([sys.executable, "-m", "coverage", "report", "-m"])
    subprocess.run([sys.executable, "-m", "coverage", "html"])
    
    report_path = Path.cwd() / 'htmlcov' / 'index.html'
    print(f"\n✅ Full, non-truncated HTML report generated!")
    print(f"View in browser: {report_path}")

    # --- 2. NEW: Test Pass/Fail Summary ---
    print("\n" + "=" * 70)
    print("Test Pass/Fail Summary")
    print("=" * 70)

    try:
        tree = ET.parse(report_file)
        root = tree.getroot()
        
        test_results = {}
        for testcase in root.findall('.//testcase'):
            name = testcase.get('name')
            status = "PASSED"
            if testcase.find('failure') is not None:
                status = "FAILED"
            elif testcase.find('skipped') is not None:
                status = "SKIPPED"
            
            # Extract iteration name from [iteration_name.py]
            match = re.search(r'\[([^\]]+)\]', name)
            if match:
                iteration_name = match.group(1)
                # Handle cases where loading failed (from your test file)
                if "FAILED_LOAD" in iteration_name:
                    iteration_name = iteration_name.replace("FAILED_LOAD: ", "")
                    status = "FAILED_LOAD"
                test_results[iteration_name] = status

        if not test_results:
            print("No test results found. Did pytest discover any tests?")
            return

        print(f"{'Iteration File':<45} {'Status':<10}")
        print("-" * 55)
        
        passed_count = 0
        failed_count = 0
        
        for name, status in sorted(test_results.items()):
            print(f"{name:<45} {status:<10}")
            if status == "PASSED":
                passed_count += 1
            else:
                failed_count += 1
        
        print("-" * 55)
        print(f"Total Passed: {passed_count}")
        print(f"Total Failed: {failed_count} (includes FAILED_LOAD)")
        
    except FileNotFoundError:
        print(f"Error: Could not find report file: {report_file}")
    except ET.ParseError:
        print(f"Error: Could not parse XML report file.")
    finally:
        # Clean up the temp report file
        if os.path.exists(report_file):
            os.remove(report_file)
            
    print("=" * 70)
    print("Script finished.")

# --- Run the function ---
run_all_tests(test_directory, code_directory)

Starting test run...
Running tests in: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2/tests
Measuring coverage for: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2/generated_code
platform linux -- Python 3.10.12, pytest-8.4.2, pluggy-1.6.0
rootdir: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2
plugins: assume-2.4.3, cov-7.0.0, anyio-4.9.0
collected 48 items

tests/test_0.py [31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31m                                                    [ 10%][0m
tests/test_1.py [31mF[0m[31mF[0m[31mF[0m[31mF[0m[31mF[0m[31m                                                    [ 20%][0m
tests/test_2.py [31mF[0m[31mF[0m[31mF[0m[32m.[0m[31mF[0m[31m                                                    [ 31%][0m
tests/test_3.py [31mF[0m[31mF[0m[32m.[0m[31mF[0m[31mF[0m[31m                                                    [ 41%][0m
tests/test_4.py [31mF[0m[31mF[0m[31mF[0m[31m    

# LLM-Assisted Test Generation & Coverage Improvement

In [134]:
import os
import subprocess
import sys
from pathlib import Path

# --- Configuration ---
test_directory = "./tests_new"
code_directory = "./generated_code_new"
html_dir = "htmlcov_new"  # The new report folder
# --- End Configuration ---


def run_all_tests_in_folders(test_dir_str: str, code_dir_str: str, html_dir_str: str):
    """
    ERASES old coverage data, then runs ALL tests in the 'test_dir_str'
    while measuring coverage ONLY on 'code_dir_str'.
    Saves a clean, combined report to 'html_dir_str'.
    """
    test_dir = Path(test_dir_str)
    code_dir = Path(code_dir_str)

    # --- Validation ---
    if not test_dir.is_dir():
        print(f"Error: Test folder not found: {test_dir.resolve()}")
        return
    if not code_dir.is_dir():
        print(f"Error: Code folder not found: {code_dir.resolve()}")
        return

    print("Starting clean test run...")
    print(f"Finding tests in:   {test_dir.resolve()}")
    print(f"Measuring code in:  {code_dir.resolve()}")
    print("=" * 70)
    
    # --- 1. ERASE old coverage data ---
    print("Erasing old coverage data...")
    subprocess.run("coverage erase", shell=True, capture_output=True)
    
    # --- 2. Main Execution Command ---
    # We tell 'coverage run' to measure the 'code_directory'
    # and tell 'pytest' to find all tests in 'test_directory'
    command = [
        sys.executable,
        "-m", "coverage", "run",
        f"--source={code_dir_str}",  # <-- Tell coverage WHAT to measure
        "--branch",
        "-m", "pytest",              # <-- Tell coverage to run pytest
        "-v",
        "-s",
        test_dir_str                 # <-- Tell pytest WHERE to find tests
    ]
    
    # Run pytest via coverage
    subprocess.run(command, text=True)
    
    # --- 3. Final Report ---
    print("=" * 70)
    print("Test run finished.")
    print("Generating new, clean reports...")
    print("=" * 70)
    
    # Generate the terminal report
    report_command = [
        sys.executable,
        "-m", "coverage", "report",
        "-m"
    ]
    subprocess.run(report_command)
    
    # Generate the HTML report
    html_command = [
        sys.executable,
        "-m", "coverage", "html",
        "-d", html_dir_str
    ]
    subprocess.run(html_command)
    
    report_path = Path.cwd() / html_dir_str / 'index.html'
    
    if report_path.is_file():
        print(f"\n✅ New, clean HTML report generated!")
        print(f"View in browser: {report_path}")
    else:
        print("\n❌ Error: HTML report was not generated.")
        print("This likely means no coverage data was collected.")

    print("=" * 70)
    print("Script finished.")

# --- Run the function ---
run_all_tests_in_folders(test_directory, code_directory, html_dir)

Starting clean test run...
Finding tests in:   /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2/tests_new
Measuring code in:  /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2/generated_code_new
Erasing old coverage data...
platform linux -- Python 3.10.12, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3
cachedir: .pytest_cache
rootdir: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2
plugins: assume-2.4.3, cov-7.0.0, anyio-4.9.0
[1mcollecting ... [0mcollected 2 items

tests_new/test_0.py::test_min_path [32mPASSED[0m
tests_new/test_1.py::test_gpa_converter [32mPASSED[0m

Test run finished.
Generating new, clean reports...
Name                              Stmts   Miss Branch BrPart  Cover   Missing
-----------------------------------------------------------------------------
generated_code_new/problem_0.py      28      1     18      2    93%   5, 15->24
generated_code_new/problem_1.py      29      3     26      3    89%   10, 18, 20
-------

In [32]:
import os

def create_coverage_prompt(code_filepath, tests_filepath):
    """
    Reads code and test files to build a prompt for improving test coverage.

    Args:
        code_filepath (str): The path to the Python file with the code to analyze.
        tests_filepath (str): The path to the Python file with the existing tests.

    Returns:
        str: The fully constructed prompt, or an error message if files are not found.
    """
    try:
        # Step 1: Read the code snippet from its file
        with open(code_filepath, 'r') as f:
            code_snippet = f.read()
            
        # Step 2: Read the existing tests from their file
        with open(tests_filepath, 'r') as f:
            existing_tests = f.read()

        # Step 3: Construct the prompt using an f-string
        # We add Markdown code fences (```python ... ```)
        # as this is a best practice for clarity when prompting LLMs.
        prompt = (
            f"Improve the branch coverage for the following code snippet\n"
            f"```python\n{code_snippet}\n```\n\n"
            f"by adding tests specifically to cover the branch coverage to jump from line 15 to 24 to these existing tests\n"
            f"```python\n{existing_tests}\n```"
        )
        
        return prompt

    except FileNotFoundError as e:
        return f"[Error] Could not find file: {e.filename}"
    except Exception as e:
        return f"[Error] An unexpected error occurred: {e}"

# --- Main execution to demonstrate the function ---
if __name__ == "__main__":
    
    code_file_path = "./generated_code_new/problem_0.py"
    test_file_path = "./tests_new/test_0_iteration_3.py"


    
final_prompt = create_coverage_prompt(code_file_path, test_file_path)

# --- 5. Print the final prompt ---
print("--- GENERATED PROMPT ---")
print(final_prompt)
print("------------------------")

--- GENERATED PROMPT ---
Improve the branch coverage for the following code snippet
```python
def min_path(grid, k):

    N = len(grid)
    if N == 0:
        return []

    # --- Chain of Thought ---
    # 1. The problem asks for a lexicographically minimum path.
    # 2. This means we must prioritize making the first step as small as
    #    possible. Since we can start anywhere, we *must* start at the
    #    smallest value in the grid, which is 1.
    
    # Find the starting coordinates of the cell with value 1
    start_r, start_c = -1, -1
    for r in range(N):
        for c in range(N):
            if grid[r][c] == 1:
                start_r, start_c = r, c
                break
        if start_r != -1:
            break
            
    # 3. Initialize our path and current position.
    path_values = [1]
    current_r, current_c = start_r, start_c
    
    # 4. We need to take k-1 more steps.
    # 5. For each step, to maintain the lexicographical minimum,
    #    we *must

### Generate Prompt using gemini api

In [33]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

load_dotenv()

# Get the API key from the environment
api_key = os.getenv("api_key")

genai.configure(api_key=api_key)
# Create the model object
model = genai.GenerativeModel('gemini-2.5-flash')

response = model.generate_content(final_prompt)

# Print the response
print(response.text)

The request to "jump from line 15 to 24" in the provided code snippet needs careful interpretation due to the code's structure and typical line numbering. Let's re-number the relevant part of the snippet to clarify:

```python
14     start_r, start_c = -1, -1
15     for r in range(N):
16         for c in range(N):
17             if grid[r][c] == 1:
18                 start_r, start_c = r, c
19                 break # Inner loop break
20         if start_r != -1: # Branch point: True if 1 found in current/prev row, False otherwise
21             break # Outer loop break
22             
23     # 3. Initialize our path and current position.
24     path_values = [1] # Target line: Initialize path
```

Based on this numbering:
- **Line 15:** `for r in range(N):` (The start of the outer loop for finding `1`).
- **Line 24:** `path_values = [1]` (The line that initializes the path after `1` has been found, or after the search for `1` has completed).

The prompt "jump from line 15 to 24" likely

### Test iterations

In [15]:
import os
import subprocess
import sys
import re
import json
from pathlib import Path
from collections import defaultdict

# --- Configuration ---
# Folders created by the setup script
test_directory = "./tests_new"
code_directory = "./generated_code_new"
html_dir = "htmlcov_iteration_4"  # The final report folder
# --- End Configuration ---


def measure_coverage_iterations(test_dir_str: str, code_dir_str: str, html_dir_str: str):
    """
    Finds all 'test_..._iteration_N.py' files, then runs tests cumulatively
    for each iteration (0, 0+1, 0+1+2, etc.) and measures coverage.
    
    Parses coverage results *per problem* (e.g., problem_0, problem_1)
    and prints a summary table of coverage improvement for each.
    """
    test_dir = Path(test_dir_str)
    code_dir = Path(code_dir_str)

    # --- 1. Validation ---
    if not test_dir.is_dir():
        print(f"Error: Test folder not found: {test_dir.resolve()}")
        print("Please make sure df are loaded first.")
        return
    if not code_dir.is_dir():
        print(f"Error: Code folder not found: {code_dir.resolve()}")
        print("Please make sure df are loaded first.")

        return

    # --- 2. Find and sort all test files by iteration ---
    all_test_files = list(test_dir.rglob("test_*.py"))
    iteration_files = {}
    max_iter = -1
    
    test_file_pattern = re.compile(r"_iteration_(\d+)\.py$")

    for file in all_test_files:
        match = test_file_pattern.search(file.name)
        if match:
            iter_num = int(match.group(1))
            max_iter = max(max_iter, iter_num)
            
            if iter_num not in iteration_files:
                iteration_files[iter_num] = []
            iteration_files[iter_num].append(file)

    if max_iter == -1:
        print(f"Error: No test files matching the pattern '*_iteration_N.py' found in {test_dir.resolve()}")
        return

    print("Found test iterations 0 through " + str(max_iter))
    print(f"Measuring code in: {code_dir.resolve()}")
    print("=" * 80)

    # --- 3. Run iterations cumulatively ---
    coverage_results = []
    current_test_files = []
    
    # We will save the json report for each iteration
    json_report_files = []

    for i in range(max_iter + 1):
        if i not in iteration_files:
            print(f"\n--- Skipping Iteration {i} (No test files found) ---")
            continue
            
        current_test_files.extend(iteration_files[i])
        
        print(f"\n--- Running Iteration {i} ({len(current_test_files)} cumulative test files) ---")
        
        # 1. Erase old data
        subprocess.run([sys.executable, "-m", "coverage", "erase"], capture_output=True)
        
        # 2. Build and run pytest command
        command = [
            sys.executable,
            "-m", "coverage", "run",
            f"--source={code_dir_str}",
            "--branch",
            "-m", "pytest",
            "-v",
            "-s"
        ]
        # Add all test files found so far (cumulative)
        command.extend([str(f) for f in current_test_files])
        
        subprocess.run(command, capture_output=True, text=True)
        
        # 3. Generate JSON report for parsing
        json_report_file = f".coverage_iter_{i}.json"
        json_report_files.append(json_report_file)
        
        json_cmd = [
            sys.executable,
            "-m", "coverage", "json",
            "-o", json_report_file
        ]
        subprocess.run(json_cmd, capture_output=True)
        
        # 4. Parse JSON report and store results (PER PROBLEM)
        try:
            with open(json_report_file) as f:
                data = json.load(f)
            
            # Use defaultdict to store stats per problem
            problem_stats = defaultdict(lambda: {
                "statements": 0,
                "covered": 0,
                "branches": 0,
                "covered_br": 0,
                "missing_br": 0
            })

            # Process each file from the JSON report
            for file_key, file_data in data.get('files', {}).items():
                file_path = Path(file_key)
                
                try:
                    # Find which problem this file belongs to
                    relative_path = file_path.relative_to(code_dir)
                    if not relative_path.parts:
                        continue
                    problem_name = relative_path.parts[0] # e.g., "problem_0"
                    
                    # Aggregate stats
                    summary = file_data.get('summary', {})
                    stats = problem_stats[problem_name]
                    stats["statements"] += summary.get('num_statements', 0)
                    stats["covered"] += summary.get('covered_lines', 0)
                    stats["branches"] += summary.get('num_branches', 0)
                    stats["covered_br"] += summary.get('covered_branches', 0)
                    stats["missing_br"] += summary.get('missing_branches', 0)

                except ValueError:
                    # File is not relative to code_dir, skip it
                    continue

            # Calculate percentages and store results for this iteration
            if not problem_stats:
                print("  Warning: No files under 'source' directory were found in coverage report.")

            for problem, stats in problem_stats.items():
                total_cov = (stats["covered"] / stats["statements"]) * 100 if stats["statements"] > 0 else 0.0
                branch_cov = (stats["covered_br"] / stats["branches"]) * 100 if stats["branches"] > 0 else 0.0
                
                coverage_results.append({
                    'iter': i,
                    'problem': problem,
                    'total': total_cov,
                    'branch': branch_cov,
                    'missing_br': stats["missing_br"],
                    'files': len(current_test_files)
                })
                print(f"  -> Problem {problem}: Total Coverage {total_cov:.2f}%, Branch Coverage {branch_cov:.2f}%")
        
        except (FileNotFoundError, json.JSONDecodeError, KeyError) as e:
            print(f"Warning: Could not parse report for iteration {i}. Error: {e}")
            coverage_results.append({
                'iter': i, 'problem': f'ERROR (iter {i})', 'total': 0.0, 'branch': 0.0, 'missing_br': 0, 'files': len(current_test_files)
            })

    # --- 4. Generate Final HTML Report (from the last run) ---
    print("=" * 80)
    print("Generating final HTML report from last iteration...")
    html_command = [
        sys.executable,
        "-m", "coverage", "html",
        "-d", html_dir_str
    ]
    subprocess.run(html_command, capture_output=True)
    
    report_path = Path.cwd() / html_dir_str / 'index.html'
    if report_path.is_file():
        print(f"✅ Final HTML report generated! View in browser: {report_path}")
    else:
        print("❌ Error: Final HTML report was not generated.")
    
    # --- 5. Print Summary Table ---
    print("\n\n" + "=" * 95)
    print(" " * 30 + "COVERAGE IMPROVEMENT SUMMARY (PER PROBLEM)")
    print("=" * 95)
    print(f"{'Iteration':<10} | {'Problem':<12} | {'Test Files':<10} | {'Total Cov.':<12} | {'Branch Cov.':<12} | {'Missing Br.':<12} | {'Improvement':<15}")
    print("-" * 95)

    last_totals_per_problem = {}

    for result in coverage_results:
        iter_num = result['iter']
        problem = result['problem']
        files = result['files']
        total = result['total']
        branch = result['branch']
        missing_br = result['missing_br']
        
        # Calculate improvement from last iteration *for this specific problem*
        last_total = last_totals_per_problem.get(problem, 0.0)
        
        if problem not in last_totals_per_problem:
            improvement_str = "(Base)"
        else:
            improvement = total - last_total
            improvement_str = f"({improvement:+.2f}%)"
        
        print(f"{iter_num:<10} | {problem:<12} | {files:<10} | {total:<12.2f}% | {branch:<12.2f}% | {missing_br:<12} | {improvement_str:<15}")
        
        # Update the last total for this problem
        last_totals_per_problem[problem] = total
    
    print("=" * 95)

    # --- 6. Cleanup ---
    print("\nCleaning up temporary files...")
    for f in json_report_files:
        if os.path.exists(f):
            os.remove(f)
    if os.path.exists(".coverage"):
        os.remove(".coverage")
    print("Done.")


# --- Run the main function ---
if __name__ == "__main__":
    measure_coverage_iterations(test_directory, code_directory, html_dir)

Found test iterations 0 through 3
Measuring code in: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2/generated_code_new

--- Running Iteration 0 (2 cumulative test files) ---
  -> Problem problem_0.py: Total Coverage 96.43%, Branch Coverage 88.89%
  -> Problem problem_0_buggy.py: Total Coverage 0.00%, Branch Coverage 0.00%
  -> Problem problem_1.py: Total Coverage 89.66%, Branch Coverage 88.46%

--- Running Iteration 1 (4 cumulative test files) ---
  -> Problem problem_0.py: Total Coverage 100.00%, Branch Coverage 94.44%
  -> Problem problem_0_buggy.py: Total Coverage 0.00%, Branch Coverage 0.00%
  -> Problem problem_1.py: Total Coverage 100.00%, Branch Coverage 100.00%

--- Running Iteration 2 (5 cumulative test files) ---
  -> Problem problem_0.py: Total Coverage 100.00%, Branch Coverage 94.44%
  -> Problem problem_0_buggy.py: Total Coverage 0.00%, Branch Coverage 0.00%
  -> Problem problem_1.py: Total Coverage 100.00%, Branch Coverage 100.00%

--- Running Iteration 3 

# Fault Detection Check

In [29]:
import os
import subprocess
import sys
from pathlib import Path

# --- Configuration (Set your paths here) ---
# The directory containing the code you want to measure
CODE_DIRECTORY_TO_MEASURE = "./generated_code_new" 

# The specific buggy code file (for our reference)
CODE_FILE_BEING_TESTED = "./generated_code_new/problem_0_buggy.py" 

# The single test file you want to run
TEST_FILE_TO_USE = "./tests_new/test_0_buggy.py" 

# The folder where the HTML report will be created
HTML_REPORT_DIR = "htmlcov_buggy_del"
# --- End Configuration ---


def measure_single_file_coverage(code_dir_str: str, test_file_str: str, html_dir_str: str):
    """
    Runs a single test file against a source directory using
    pytest-cov and generates a terminal and HTML report.
    """
    code_dir = Path(code_dir_str)
    test_file = Path(test_file_str)

    # --- 1. Validation ---
    if not code_dir.is_dir():
        print(f"Error: Code directory not found: {code_dir.resolve()}")
        return
    if not test_file.is_file():
        print(f"Error: Test file not found: {test_file.resolve()}")
        return
    
    # Check for the __init__.py file
    init_file = code_dir / "__init__.py"
    if not init_file.is_file():
        print(f"Warning: Missing {init_file.resolve()}")
        print("         Please create this empty file to ensure imports work correctly.")

    print("=" * 80)
    print(f"Measuring coverage for: {code_dir_str}")
    print(f"Running test file:     {test_file.name}")
    print("=" * 80)

    # --- 2. Erase old coverage data ---
    subprocess.run([sys.executable, "-m", "coverage", "erase"], 
                   capture_output=True)

    # --- 3. Run pytest with coverage ---
    command = [
        sys.executable,
        "-m", "coverage", "run",
        f"--source={code_dir_str}",  # <-- THE FIX: Point source at the DIRECTORY
        "--branch",
        "-m", "pytest",
        "-v",
        test_file_str  # Run this specific test file
    ]
    
    # We don't capture output, so you can see the pytest failure
    print(f"Running command: {' '.join(command)}\n")
    subprocess.run(command)

    # --- 4. Show simple terminal report ---
    print("\n" + "=" * 80)
    print("Terminal Coverage Report (shows missing lines)")
    print("=" * 80)
    report_cmd = [
        sys.executable,
        "-m", "coverage", "report",
        "-m",  # Show missing lines
        
        # We can also add the file we care about here to filter the report
        CODE_FILE_BEING_TESTED 
    ]
    subprocess.run(report_cmd)

    # --- 5. Generate Final HTML Report ---
    print("\n" + "=" * 80)
    print("Generating final HTML report...")
    html_command = [
        sys.executable,
        "-m", "coverage", "html",
        "-d", html_dir_str
    ]
    subprocess.run(html_command, capture_output=True)
    
    report_path = Path.cwd() / html_dir_str / 'index.html'
    if report_path.is_file():
        # Check if the file we care about is in the report
        file_report_path = Path.cwd() / html_dir_str / f"{Path(CODE_FILE_BEING_TESTED).name}.html"
        
        print(f"Tinal HTML report generated! View in browser:")
        print(f"   {report_path.as_uri()}")
        
        if file_report_path.is_file():
             print(f"\nDirect link to your file's report:\n   {file_report_path.as_uri()}")
        else:
             print(f"\nWarning: Your specific file ({CODE_FILE_BEING_TESTED}) was not found in the HTML report.")

    else:
        print("Error: Final HTML report was not generated.")
        print("   This still means 'coverage run' collected no data.")
    
    # --- 6. Cleanup ---
    if os.path.exists(".coverage"):
        os.remove(".coverage")
    print("Done.")


# --- Run the main function ---
if __name__ == "__main__":
    measure_single_file_coverage(
        CODE_DIRECTORY_TO_MEASURE, 
        TEST_FILE_TO_USE, 
        HTML_REPORT_DIR
    )

Measuring coverage for: ./generated_code_new
Running test file:     test_0_buggy.py
Running command: /usr/bin/python3 -m coverage run --source=./generated_code_new --branch -m pytest -v ./tests_new/test_0_buggy.py

platform linux -- Python 3.10.12, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3
cachedir: .pytest_cache
rootdir: /home/vincent/Documents/unirepo/WiSe25_26/soft_eng/assignment2
plugins: assume-2.4.3, cov-7.0.0, anyio-4.9.0
[1mcollecting ... [0mcollected 1 item

tests_new/test_0_buggy.py::test_min_path [31mFAILED[0m[31m                          [100%][0m

[31m[1m________________________________ test_min_path _________________________________[0m

tp = <class 'pytest_assume.plugin.FailedAssumption'>, value = None, tb = None

    [0m[94mdef[39;49;00m [92mreraise[39;49;00m(tp, value, tb=[94mNone[39;49;00m):[90m[39;49;00m
        [94mtry[39;49;00m:[90m[39;49;00m
            [94mif[39;49;00m value [95mis[39;49;00m [94mNone[39;49;00m:[90m[39;49;00m
    