In [1]:
import logging
import os
import subprocess
from typing import List, Dict

import pandas as pd


In [2]:
def read_file(base_directory: str) -> pd.DataFrame:
    """
    Traverse a directory and its subdirectories to find and process files with the ".res.c" extension.
    Files are processed in alphabetical order, ensuring consistent execution across runs on the same system.
    
    Parameters:
    - base_directory (str): The base directory path from which to start the search.
    
    Returns:
    - pd.DataFrame: A DataFrame containing the file path without the ".res.c" extension, the directory name,
      and a boolean indicating whether a corresponding ".cocci" file exists for each ".res.c" file found.
    """
    out: List[Dict[str, any]] = []  # Initialize a list to hold the file information dictionaries.
    for root, dirs, files in os.walk(base_directory):
        files.sort()  # Sort files alphabetically within each directory for consistent processing order.
        dirs.sort()   # Sort subdirectories alphabetically to ensure consistent traversal order.
        for filename in files:
            if filename.endswith(".res.c.sanitized.res.c") and "spinfer" not in filename and "gpt" not in filename and "deepseek" not in filename and "claude" not in filename:
                filepath = os.path.join(root, filename).split(".res.c.sanitized.res.c")[0]
                temp_dict: Dict[str, any] = {
                    "filepath": filepath,
                    "directory": root.split("/")[-1] if "treewide" not in root else "treewide",
                    "is_cocci": os.path.isfile(filepath + ".cocci")
                }
                out.append(temp_dict)
    return pd.DataFrame(out)


def apply_cocci(cocci_filepath: str, c_before_filepath: str, c_after_result_filepath: str, debug_filepath: str) -> None:
    '''
    Applies a semantic patch specified by cocci_filepath to a C file, writes the result to c_after_result_filepath,
    and writes debug information to debug_filepath.
    '''
    # Construct the command as a list for better security and cross-platform support
    command = [
        "spatch",
        "--sp-file", cocci_filepath,
        c_before_filepath,
        "-o", c_after_result_filepath,
        "--debug",
        "--timeout", "30"
    ]
    
    # Open the debug file to write the output and error information
    with open(debug_filepath, "w") as output_file:
        try:
            # Run the command using subprocess.run
            result = subprocess.run(command, stdout=output_file, stderr=output_file, check=True)
            logging.info(f"{' '.join(command)} is executed!")
        except subprocess.CalledProcessError as e:
            # Handle errors in command execution
            logging.error(f"Error occurred: {e}")

In [3]:
DIRECTORY = "./"
APPROACH = ["gpt4", "claude", "deepseek-chat", "spinfer"]

for approach in APPROACH:
    # read c files
    df = read_file(DIRECTORY)
    logging.info(f"df shape: {df.shape}")
    directories = df["directory"].unique()

    for d in directories:
        logging.info(f"Processing directory: {d}")

        filter = df["directory"] == d
        temp_df = df[filter]

        for idx, row in temp_df.iterrows():
            logging.info(f"Processing file: {row['filepath']}")
            
            current_d = "/".join(row["filepath"].split("/")[:-1])
            
            cocci_filepaths = []
            for item in os.listdir(current_d):
                if item.endswith(f".{approach}.cocci"):
                    cocci_filepaths.append(os.path.join(current_d, item))

            cocci_filepaths.sort()
            
            for idx, f in enumerate(cocci_filepaths):
                if os.path.exists(f):
                    logging.info(f"Cocci filepath is found: {f}")
                    
                    filepath = os.path.basename(f)

                    pred = filepath.split(".")[0].split("_")[-1]

                    c_init = row["filepath"] + ".c.sanitized.c"
                    c_final = row["filepath"] + f".{pred}.{approach}.res.c"
                    debug = row["filepath"] + f".{pred}.{approach}.res.txt"
                    # break
                    apply_cocci(f, c_init, c_final, debug)

                else:
                    logging.info(f"Cocci filepath not found: {f}")

ERROR:root:Error occurred: Command '['spatch', '--sp-file', './dasd_smalloc/sp_out.final.gpt4.cocci', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_dasd_dasd_generic_build_rdc.c.sanitized.c', '-o', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_dasd_dasd_generic_build_rdc.out.gpt4.res.c', '--debug', '--timeout', '30']' returned non-zero exit status 255.
ERROR:root:Error occurred: Command '['spatch', '--sp-file', './dasd_smalloc/sp_out.final.gpt4.cocci', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_dasd_eckd_dasd_eckd_build_check.c.sanitized.c', '-o', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_dasd_eckd_dasd_eckd_build_check.out.gpt4.res.c', '--debug', '--timeout', '30']' returned non-zero exit status 255.
ERROR:root:Error occurred: Command '['spatch', '--sp-file', './dasd_smalloc/sp_out.final.gpt4.cocci', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_dasd_eckd_dasd_eckd_build_check_tcw.c.sanitized.c', '-o', './dasd_smalloc/1528132059_2018-06-04_c5205f2ff2be_