# Generating refactored corpus
Modify the synthetic benchmarks to generate a corpus of refactored programs.
Generate 1 refactored program for each benchmark version.

In [6]:
import import_ipynb
import importlib
from transformations import *
# importlib.reload(transformations)

In [7]:
from pathlib import Path
import difflib
import random
random.seed(0)

In [8]:

import random
import traceback
import copy
from transformations import *

def c2c(project, c_filename, transforms, picker, num_iterations):
    """Do C source-to-source translation"""
    transforms = copy.deepcopy(transforms)
    xml = srcml(c_filename)
    info = {"project": str(project)}
    mod_filename = c_filename.parent / (c_filename.stem + '.new.c')
    dst_filename = c_filename.parent / (c_filename.stem + '.new.xml')
    transform_filename = c_filename.parent / (c_filename.stem + '.transforms.txt')

    transforms_applied = []
    def log_transforms_applied():
        # print('Logging', len(transforms_applied), 'transforms to', transform_filename)
        with open(transform_filename, 'w') as f:
            for t in transforms_applied:
                f.write(f'{t.__name__}\n')

    # Apply num_iterations transforms, chosen randomly
    i = 0
    while i < num_iterations:
        if len(transforms) == 0:
            print('Quitting early, ran out of transforms')
            break
        t = random.choice(transforms)
        try:
            xml = t(xml, picker=picker, info=info)
            i += 1
            et.ElementTree(xml).write(str(dst_filename))
            modified_c = srcml(dst_filename)
            with open(mod_filename, 'w') as f:
                f.write(modified_c)
            xml = srcml(mod_filename)
            print('Applied', t.__name__)
            transforms.remove(t)
            transforms_applied.append(t)
            log_transforms_applied()  # NOTE: Log after each transform, for debugging in case the procedure is errored.
        except Exception as e:
            print(f'Error applying {t.__name__}: {e}')
            transforms.remove(t)
            continue
    return mod_filename

def reformat(old_c_file, new_c_file):
    import difflib
    differ = difflib.Differ()
    diffs = differ.compare(old_c_file.open().readlines(), new_c_file.open().readlines())
    r = []
    line_nums = []
    lineno = 0
    for line in diffs:
        if line[0] in (' ', '+'):
            lineno += 1
        # print(str(lineno).ljust(3), ' ', line, end='')
        if line[0] == '+':
            if len(r) == 0 or r[-1] == lineno-1:
                r.append(lineno)
            else:
                line_nums.append(r)
                r = [lineno]
    line_nums.append(r)
    lines = []
    for r in line_nums:
        lines.append(f'--lines={r[0]}:{r[-1]}')
    style='-style="{BasedOnStyle: llvm, IndentWidth: 4}"'
    formatted_c_file = str(new_c_file).replace(".new.", ".formatted.")
    subprocess.run(f'clang-format {" ".join(lines)} {new_c_file} > {formatted_c_file} {style}', shell=True, check=True)
    return Path(formatted_c_file)

In [9]:
def run(project, file, transforms, num_iterations, picker):
    c_file = Path(file)
    new_c_file = c2c(project, c_file, transforms, picker, num_iterations)
    new_c_file = reformat(c_file, new_c_file)
    new_c_file = Path(new_c_file)
    return new_c_file

## Test the code
This is an example for how to use c2clib

In [10]:
project = Path('tests/testbed2')
c_file = project / 'testbed2.c'
transforms = [
    insert_noop,
    switch_exchange,
    loop_exchange,
    rename_variable,
    permute_stmt,
]
for t in transforms:
    new_c_file = run('tests/testbed2', c_file, [t], 1, random.choice)
    diff = list(difflib.unified_diff(c_file.open().readlines(), new_c_file.open().readlines()))
    # print(t.__name__)
    print(''.join(diff))

NameError: name 'transformations' is not defined

## Begin experiment

Subject benchmarks:
- ABM
- C Test Suite

Hyperparameters:
- random seed
- num_iterations: Number of transformations to do
- transforms:


In [None]:
import itertools

def picker(collection):
    assert len(collection) > 0, 'Collection is empty'
    return random.choice(collection)
num_iterations = 3
transforms = [
    insert_noop,
    switch_exchange,
    loop_exchange,
    rename_variable,
    permute_stmt,
]
def run_exp(project, c_file):
    return run(project, c_file, transforms=transforms, num_iterations=num_iterations, picker=picker)
    # for f in itertools.chain(project.glob('*.new.c'), project.glob('*.*.xml'), project.glob('*.c.diff')):
    #     f.unlink()

In [None]:
# import itertools
# abm = Path('tests/abm').glob('*')
# ctestsuite = Path('tests/ctestsuite').glob('*')
# zitser = Path('tests/zitser').glob('*')
# all = itertools.chain(abm, ctestsuite, zitser)
# all_projects = itertools.chain(abm)

tests = Path('tests')
# This file contains all the buggy versions from the synthetic benchmarks with the format "project-version".
# One sample per line.
with open('synthetic-samples.txt') as f:
    samples = f.read().splitlines()
samples = [s.split('-') for s in samples]
all_projects = [tests / p / v for p,v in samples]

for project in all_projects:
    print(project)
    assert project.exists()
    c_files = list(project.glob('*.c'))
    c_files = [c for c in c_files if not c.name.endswith('.formatted.c') and not c.name.endswith('.new.c')]
    assert len(c_files) >= 1, f'No C files found in {project}'

    if len(c_files) == 1:
        c_file = c_files[0]
    elif len(c_files) > 1:
        for fpath in c_files:
            with fpath.open() as f:
                text = f.read()
            if '/* BAD */' in text or '/* FLAW */' in text:
                c_file = fpath
    new_c_file = run_exp(project, c_file)
    print(c_file, new_c_file)
    diff = list(difflib.unified_diff(c_file.open().readlines(), new_c_file.open().readlines()))
    with open(str(c_file) + '.diff', 'w') as f:
        f.write(''.join(diff))

tests/abm/550
Applied rename_variable
Error applying loop_exchange: Collection is empty
Error applying switch_exchange: Collection is empty
Error applying permute_stmt: Collection is empty
Applied insert_noop
Quitting early, ran out of transforms
tests/abm/550/chroot1.c tests/abm/550/chroot1.formatted.c
tests/abm/557
Error applying switch_exchange: Collection is empty
Applied rename_variable
Applied insert_noop
Error applying permute_stmt: Collection is empty
Error applying loop_exchange: Collection is empty
Quitting early, ran out of transforms
tests/abm/557/fmt2.c tests/abm/557/fmt2.formatted.c
tests/abm/575
Error applying loop_exchange: argument of type 'NoneType' is not iterable
Applied insert_noop
Applied rename_variable
Error applying switch_exchange: Collection is empty
Applied permute_stmt
tests/abm/575/into3.c tests/abm/575/into3.formatted.c
tests/abm/577
Error applying loop_exchange: argument of type 'NoneType' is not iterable
Applied permute_stmt
Applied rename_variable
Appl

KeyboardInterrupt: 