In [107]:
import google.generativeai as genai
import requests
from bs4 import BeautifulSoup
import subprocess
import os
import filecmp
from glob import glob
from pathlib import Path
import pandas as pd
import sys
import subprocess

In [72]:
# Configure the Gemini client with your API key
genai.configure(api_key="")
model = genai.GenerativeModel("gemini-1.5-flash")

def fetch_jira_bug_report(jira_url):
    """
    Fetch and parse a bug report from an Apache Jira link, including the priority.
    :param jira_url: URL to the Jira issue.
    :return: Bug report details including priority as a string.
    """
    try:
        response = requests.get(jira_url)
        if response.status_code != 200:
            raise Exception(f"Failed to fetch Jira URL. HTTP Status Code: {response.status_code}")
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract bug report title
        title_element = soup.find("h1")
        title = title_element.text.strip() if title_element else "No title available."

        # Extract description
        description_element = soup.find("div", {"id": "descriptionmodule"})
        description = description_element.text.strip() if description_element else "No description available."

        # Extract priority
        priority_element = soup.find("span", {"id": "priority-val"})
        priority = priority_element.text.strip() if priority_element else "No priority available."

        # Combine title, description, and priority
        bug_report = (
            f"Title: {title}\n\n"
            f"Description: {description}\n\n"
            f"Priority: {priority}"
        )
        return bug_report
    except Exception as e:
        print(f"Error fetching Jira bug report: {e}")
        return ""


def compare_folders(folder1: str, folder2: str):
    """
    Compares the files in two folders. Reports files that are unique to each folder,
    files that differ in content, and the content differences for differing files.

    Args:
        folder1 (str): Path to the first folder.
        folder2 (str): Path to the second folder.

    Returns:
        dict: A dictionary with comparison results.
    """
    if not os.path.isdir(folder1) or not os.path.isdir(folder2):
        raise ValueError("Both inputs must be valid folder paths.")

    # Get list of files with .java extension in each folder
    print(f"Comparing files in {folder1} and {folder2}")
    files1 = glob(f'{folder1}/**/*.java', recursive=True)
    files2 = glob(f'{folder2}/**/*.java', recursive=True)

    print(f"Found {len(files1)} files in {folder1}")
    for file in files1:
        print(f"  {file}")
    print(f"Found {len(files2)} files in {folder2}")
    for file in files2:
        print(f"  {file}")
        

    # find files relative to the folder
    files1 = [os.path.relpath(file, folder1) for file in files1]
    files2 = [os.path.relpath(file, folder2) for file in files2]

    # Find files unique to each folder
    unique_files1 = set(files1) - set(files2)
    unique_files2 = set(files2) - set(files1)

    # Find files that are common to both folders
    common_files = set(files1) & set(files2)

    print(f"Files unique to {folder1}:")
    for file in unique_files1:
        print(f"  {file}")

    print(f"Files unique to {folder2}:")
    for file in unique_files2:
        print(f"  {file}")

    # for each common file, compare the content
    differing_files = []
    for file in common_files:
        file1 = os.path.join(folder1, file)
        file2 = os.path.join(folder2, file)
        if not filecmp.cmp(file1, file2):
            differing_files.append(file)

    print(f"Files differing in content:")
    for file in differing_files:
        print(f"  {file}")


    return {
        "unique_files1": unique_files1,
        "unique_files2": unique_files2,
        "differing_files": differing_files
    }

In [7]:
active_bugs_df = pd.read_csv("active-bugs.csv")
active_bugs_df

Unnamed: 0,bug.id,revision.id.buggy,revision.id.fixed,report.id,report.url
0,1,8f46f467a33ace44ccd9f394910940b6c48a3827,b0e1b80b6d4a10a9c9f46539bc4c7a3cce55886e,CLI-13,https://issues.apache.org/jira/browse/CLI-13
1,2,b0e1b80b6d4a10a9c9f46539bc4c7a3cce55886e,2ff9573ffb08dd52ec3a55a49f2d77a1e94efbdf,CLI-51,https://issues.apache.org/jira/browse/CLI-51
2,3,85248e8ae52232ed75c2b4c52d4071bdf192db37,d35f2fa7a06457469a617677eeb4c1dc21484006,cli-1,https://issues.apache.org/jira/browse/CLI-1
3,4,8ca630b76ebcfe24915a9edb3a6de756cab761c9,f78959477b207bf710049aa35730ef7659f0a1b9,cli-1,https://issues.apache.org/jira/browse/CLI-1
4,5,2b0a94aee899d9e7d855c402ad40eb4e318f46e7,3880640ee6268a2ecb2912e1ae896153dc2229e5,CLI-133,https://issues.apache.org/jira/browse/CLI-133
5,7,73276486b1f39510bf27cb6c0684da805b121f5d,4ee0d6c4f1b553858aeabf3ee6fb37d764f263e2,CLI-121,https://issues.apache.org/jira/browse/CLI-121
6,8,6a6bb48840aa0043200a5d9f6fffea47aea1a8db,4f407378c93b9e2a8e24c855e8bed5eb12bf7a06,CLI-151,https://issues.apache.org/jira/browse/CLI-151
7,9,298804b71d4e5f8b621f48a300f421229a3f5c86,fea35870b69c0d37ab36ab6783edbc7aacc0842c,CLI-149,https://issues.apache.org/jira/browse/CLI-149
8,10,d0f6128191443bbf49e1c9fde616f87c16486db4,d72a578a8949e4b1c437ec55997d8786836f139c,CLI-156,https://issues.apache.org/jira/browse/CLI-156
9,11,33abc034037e3ef5d85d1b03010122617c386827,d36adebd3547279b709960c902c3fb7b89a9a4ef,cli-1,https://issues.apache.org/jira/browse/CLI-1


In [116]:
def generated_test_case(buggy_project_folder, fixed_project_folder, bug_row, bug_id, output_folder):
    jira_url = bug_row['report.url']
    bug_report_parsed = fetch_jira_bug_report(jira_url)
    folder_comparison = compare_folders(buggy_project_folder, fixed_project_folder)

    buggy_code_paths = [Path(buggy_project_folder) / file for file in folder_comparison['differing_files']]
    fixed_code_paths = [Path(fixed_project_folder) / file for file in folder_comparison['differing_files']]

    buggy_code_text = ""
    for path in buggy_code_paths:
        buggy_code_text += f"Filename: {str(path)}\n\n"
        buggy_code_text += path.read_text()

    fixed_code_text = ""
    for path in fixed_code_paths:
        fixed_code_text += f"Filename: {str(path)}\n\n"
        fixed_code_text += path.read_text()

    prompt = f'''There is a bug fix in the java code, Bug report is as follows:
    {bug_report_parsed}
    Buggy Code:
    {buggy_code_text}
    Fixed Code:
    {fixed_code_text}
    Generate test cases that covers the bug. Do not include any text or explanation, provide only the test cases.
    All the test should be in a single method named generated_tests and class name should be CustomTest.
    Generated Test case should be executable both in the buggy and fixed code, and should cover the bug.
    '''

    try:
        response = model.generate_content(prompt)
        generated_test_case = response.text.strip()
        # delete first and last line
        generated_test_case = generated_test_case.split("\n")[1:-1]
        generated_test_case = "\n".join(generated_test_case)
        # write the generated test case to a file
        test_case_file = Path(output_folder) / f"CustomTest_{bug_id}.java"
        test_case_file.write_text(generated_test_case)
    except Exception as e:
        print(f"An error occurred: {e}")



In [None]:
bug_id = 40
bug_source_folder = f'/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/OUTPUT_BUGGY/Cli/Cli_{bug_id}_buggy'
bug_fixed_folder = f'/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/OUTPUT_FIXED/Cli/Cli_{bug_id}_fixed'
buggy_output_folder = f'/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests/OUTPUT_BUGGY/Cli/Cli_{bug_id}_buggy/src/test/java/org/apache/commons/cli'

related_bug_row = active_bugs_df.loc[active_bugs_df['bug.id'] == bug_id]
generated_test_case(bug_source_folder, bug_fixed_folder, related_bug_row, bug_id,buggy_output_folder, fixed_output_folder)

In [117]:
active_bugs = active_bugs_df.to_dict(orient='records')
generated_tests_folder = Path("/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests")
for row in active_bugs:
    print("Processing bug:", row['bug.id'])
    bug_id = row['bug.id']
    report_url = row['report.url']
    bug_source_folder = f'/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_{bug_id}_buggy'
    bug_fixed_folder = f'/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_FIXED/Cli/Cli_{bug_id}_fixed'

    # list all files in bug source folder recursively
    files = glob(f'{bug_source_folder}/**/*.java', recursive=True)
    # if there are no java files in the folder, skip the bug
    if len(files) == 0:
        print(f"No java files found in {bug_source_folder}. Skipping bug.")
        continue

    files = glob(f'{bug_fixed_folder}/**/*.java', recursive=True)
    # if there are no java files in the folder, skip the bug
    if len(files) == 0:
        print(f"No java files found in {bug_fixed_folder}. Skipping bug.")
        continue

    generated_test_case(bug_source_folder, bug_fixed_folder, row, bug_id, generated_tests_folder)

Processing bug: 1
No java files found in /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_FIXED/Cli/Cli_1_fixed. Skipping bug.
Processing bug: 2
No java files found in /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_FIXED/Cli/Cli_2_fixed. Skipping bug.
Processing bug: 3
Comparing files in /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_3_buggy and /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_FIXED/Cli/Cli_3_fixed
Found 43 files in /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_3_buggy
  /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_3_buggy/CLI2ConverterTest.java
  /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_3_buggy/CLI2Converter.java
  /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OU

In [104]:
SOURCE_PATH = Path('/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli')

# list folders in SOURCE_PATH
full_paths = [f.path for f in os.scandir(SOURCE_PATH) if f.is_dir()]
folder_test_case_map = {}
for folder_path in full_paths:
    # find test cases folder
    # iterate over folders in folder_path, find folder that contains file ending with Test.java

    test_case_folder = None
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith("Test.java"):
                # if current folder contains test folder in its path, then it is the test folder
                if "test" in root:
                    test_case_folder = root
                    break
        if test_case_folder:
            break

    print(f"Test case folder: {test_case_folder}")
    if test_case_folder:
        bug_id = folder_path.split("_")[-2]
        # take relative path to folder_path
        test_case_folder = os.path.relpath(test_case_folder, SOURCE_PATH)
        folder_test_case_map[bug_id] = test_case_folder

Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_36_buggy/src/test/java/org/apache/commons/cli
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_18_buggy/src/test/org/apache/commons/cli
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_30_buggy/src/test/java/org/apache/commons/cli
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_23_buggy/src/test/org/apache/commons/cli
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_7_buggy/src/test/org/apache/commons/cli2
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_fix_versions/OUTPUT_BUGGY/Cli/Cli_27_buggy/src/test/org/apache/commons/cli
Test case folder: /mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/bug_f

In [112]:
generated_test_cases_folder = '/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests'
# list all files in generated_test_cases_folder
files = glob(f'{generated_test_cases_folder}/**/*.java', recursive=True)
for generated_test_case in files:
    bug_id = generated_test_case.split("_")[-1].split(".")[0]
    

Bug ID: 12
Bug ID: 17
Bug ID: 40
Bug ID: 33
Bug ID: 36
Bug ID: 26
Bug ID: 39
Bug ID: 30
Bug ID: 14
Bug ID: 20
Bug ID: 19
Bug ID: 18
Bug ID: 11
Bug ID: 13
Bug ID: 25
Bug ID: 5
Bug ID: 29
Bug ID: 4
Bug ID: 10
Bug ID: 23
Bug ID: 7
Bug ID: 35
Bug ID: 22
Bug ID: 38
Bug ID: 9
Bug ID: 8
Bug ID: 27
Bug ID: 16
Bug ID: 31
Bug ID: 28
Bug ID: 21
Bug ID: 37
Bug ID: 15
Bug ID: 34
Bug ID: 3
Bug ID: 32
Bug ID: 24


In [113]:
# copy all test cases to dest paths in docker container
CONTAINER_ID = "c758b445f967"
# bug_id = 3
# dest_path = folder_test_case_map[str(bug_id)]
# cmd_command = [
#     "docker",
#     "cp",
#     f"/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests/CustomTest_{bug_id}.java",
#     f"{CONTAINER_ID}:/usr/ressources/OUTPUT_BUGGY/Cli/{dest_path}/CustomTest.java"
# ]

generated_test_cases_folder = '/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests'
# list all files in generated_test_cases_folder
files = glob(f'{generated_test_cases_folder}/**/*.java', recursive=True)
for generated_test_case in files:
    bug_id = generated_test_case.split("_")[-1].split(".")[0]
    dest_path = folder_test_case_map[str(bug_id)]
    cmd_command = [
        "docker",
        "cp",
        f"/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests/CustomTest_{bug_id}.java",
        f"{CONTAINER_ID}:/usr/ressources/OUTPUT_BUGGY/Cli/{dest_path}/CustomTest.java"
    ]


    # run the command
    subprocess.run(cmd_command, check=True)

In [19]:
buggy_code_path = Path(bug_source_folder) / folder_comparison['differing_files'][0]
fixed_code_path = Path(bug_fixed_folder) / folder_comparison['differing_files'][0]

buggy_code = buggy_code_path.read_text()
fixed_code = fixed_code_path.read_text()

prompt = f'''There is a bug fix in the java code, Bug report is as follows:
{bug_report_parsed}
Buggy Code:
{buggy_code}
Fixed Code:
{fixed_code}
Generate test cases that covers the bug. Do not include any text or explanation, provide only the test cases.
'''

In [20]:
try:
    response = model.generate_content(prompt)
    print("Generated Unit Tests:\n")
    print(response.text.strip())
except Exception as e:
    print(f"An error occurred: {e}")

Generated Unit Tests:

```java
import org.apache.commons.cli.Util;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

public class UtilTest {

    @Test
    public void testStripLeadingHyphensNull() {
        assertNull(Util.stripLeadingHyphens(null));
    }

    @Test
    public void testStripLeadingHyphensEmpty() {
        assertEquals("", Util.stripLeadingHyphens(""));
    }

    @Test
    public void testStripLeadingHyphensSingleHyphen() {
        assertEquals("abc", Util.stripLeadingHyphens("-abc"));
    }

    @Test
    public void testStripLeadingHyphensDoubleHyphen() {
        assertEquals("abc", Util.stripLeadingHyphens("--abc"));
    }

    @Test
    public void testStripLeadingHyphensNoHyphen() {
        assertEquals("abc", Util.stripLeadingHyphens("abc"));
    }
}
```



In [23]:
generated_test_case = response.text.strip()
# write the generated test case to a file
test_case_file = Path('/mnt/seconddisk/master/courses/cs588/playgrounds.ipynb/generated_tests') / "generated_test_case.txt"
test_case_file.write_text(generated_test_case)

828