In [5]:
from datasets import load_dataset

In [6]:
dataset = load_dataset("Rtian/DebugBench")

In [7]:
import difflib

def remove_blank_lines(text):
    return "\n".join([s for s in text.splitlines() if s])

def custom_diff(file1_contents, file2_contents):
    diff = difflib.ndiff(file1_contents.splitlines(), file2_contents.splitlines())
    
    old_lines = []

    current_line_number = 1
    for line in diff:        
        if line.startswith('  '):
            current_line_number += 1
            continue
        elif line.startswith('- '):  # Lines in file1 but not in file2
            old_lines.append([current_line_number, line[2:]])
            current_line_number += 1        
    return old_lines

solution = """def sum(a, b):
                sum = 0
                return sumum
            """

buggy_code = """def sum(a, b):
                sum = 3
                zzzz
                return sum
            """

#old = custom_diff(buggy_code, solution)
#print(old)

In [8]:
import subprocess

def format_cpp_code(code):
    try:
        process = subprocess.Popen(['clang-format', '-style=file:./clang-format.txt'], 
                                   stdin=subprocess.PIPE, 
                                   stdout=subprocess.PIPE, 
                                   stderr=subprocess.PIPE)
        formatted_code, errors = process.communicate(input=code.encode())
        
        if process.returncode != 0:
            print("Error formatting code: ", errors.decode())
            return None
        
        return formatted_code.decode()
    except Exception as e:
        print(f"An exception occurred: {e}")
        return None


In [10]:
import requests as req
import pandas as pd

def process_entry(entry):
    buggy_code = format_cpp_code(remove_blank_lines(entry['buggy_code']))
    solution = format_cpp_code(remove_blank_lines(entry['solution']))
    diff = custom_diff(buggy_code, solution)

    data = {"prompt": buggy_code}
    resp = req.post("http://delos.eaalab.hpi.uni-potsdam.de:8010/highlight-code/", json=data)
    response_json = resp.json()

    line_numbers = []
    for item in response_json:
        line_number = int(item['line_number'])
        line_numbers.append(line_number)

    return buggy_code, solution, diff, line_numbers

df = pd.DataFrame(dataset['test'])

filtered_df = df[(df['language'] == 'cpp') & (df['category'] == 'logic error')]

filtered_df = filtered_df.head(20)

results = filtered_df.apply(process_entry, axis=1)

buggy_code_list, solutions, diffs, line_numbers_list = zip(*results)

print("Total number of cpp logic error examples: ", len(filtered_df))


Total number of cpp logic error examples:  20


In [15]:
for i in range(len(line_numbers_list)):
    print("Example: ", i)
    print(line_numbers_list[i])
    print([line[0] for line in diffs[i]])

Example:  0
[5, 13, 23]
[5]
Example:  1
[21]
[17, 18]
Example:  2
[18, 21, 27]
[24]
Example:  3
[8, 27]
[27]
Example:  4
[7, 11]
[8]
Example:  5
[15, 26]
[6, 9, 12]
Example:  6
[8, 10, 11]
[11]
Example:  7
[12, 21, 45, 37]
[12]
Example:  8
[8, 5, 13, 20]
[16]
Example:  9
[-1, -1]
[18]
Example:  10
[]
[19, 22]
Example:  11
[]
[16]
Example:  12
[-1, -1, 20]
[20]
Example:  13
[-1, -1]
[16, 17]
Example:  14
[-1, -1, -1]
[15]
Example:  15
[-1, 11]
[8, 9]
Example:  16
[]
[16, 33]
Example:  17
[4, 9, 18]
[14]
Example:  18
[8, 8, 8]
[8]
Example:  19
[-1, 20, 22, 24]
[10, 12, 21, 25]


In [21]:
# print 9th column form dataframe
print(buggy_code_list[9])
print(solutions[9])
print(diffs[9])

class Solution {
   public:
    long long beautifulSubarrays(vector<int>& nums) {
        int n = nums.size();

        long long ans = 0;

        // declare an unordered map

        unordered_map<int, int> mp;

        mp[0] = 1;

        int curr_xorr = 0;

        // traverse over the nums

        for (int i = 0; i <= n; i++) {
            // find xorr

            curr_xorr = (curr_xorr ^ nums[i]);

            // if curr_xorr exit in map then update ans

            if (mp.count(curr_xorr)) {
                ans += mp[curr_xorr];
            }

            // increment the freq of curr_xorr

            mp[curr_xorr]++;
        }

        return ans;
    }
};
class Solution {
   public:
    long long beautifulSubarrays(vector<int>& nums) {
        int n = nums.size();

        long long ans = 0;

        // declare an unordered map

        unordered_map<int, int> mp;

        // insert 0 with the frequncy of 1

        mp[0] = 1;

        int curr_xorr = 0;

        // travers