Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
githubhuyang committed Sep 16, 2019
1 parent e28acd5 commit 9941a8a
Show file tree
Hide file tree
Showing 27 changed files with 6,778 additions and 0 deletions.
62 changes: 62 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,65 @@ Refactory is a fully automated approach for generating student program repairs i

## Authors
Yang Hu, Umair Z. Ahmed, Sergey Mechtaev, Ben Leong, Abhik Roychoudhury

If you use Refactory in your research project, please include the following citation:
@inproceedings{yang2019refactory,
title={Re-factoring based Program Repair applied to Programming Assignments},
author={Hu, Yang and Ahmed, Umair Z. and Mechtaev, Sergey and Leong, Ben and Roychoudhury, Abhik},
booktitle={The 34th IEEE/ACM International Conference on Automated Software Engineering (ASE 2019)},
year={2019},
organization={IEEE/ACM}
}

## Principal Investigator
Abhik Roychoudhury

## Developers
Yang Hu, Umair Z. Ahmed

## Usage
### Data Format
Data directory contains all inputs of Refactory, including test-suite, student submissions (correct or buggy) and referene solutions. Please arranges those items in the following directory tree structure.
```
|-data
|-question_xxx
| |-ans
| | |-input_xxx.txt
| | |-output_xxx.txt
| | |-...
| |
| |-code
| |-correct
| | |-sub_xxxxxxx.py
| | |-...
| |
| |-wrong
| |-sub_xxxxxxx.py
| |-...
|
|
|-...
```
`ans` directory contains the whole test-suite, which is arranged as a sequence of input-output file pairs. `code/correct` directory contains all correct (i.e., pass all test cases) submissions, while `code/wrong` contains incorrect ones. `global.py`

### Run Refactory
#### Experimental Environment Setup
Refactory is implemented in Python 3.7. Although there are various Python distributions available online, we recommend using Anaconda 3, which has preinstalled commonly-used Python software packages. Then use `pip` to install residucal software packages to Anaconda.

`pip install psutil zss autopep8 python-Levenshtein astunparse prettytable apted fastcache`

Besides manually deploying the experimental environment, you can setup the environment in docker by building a docker image based on `docker/Dockerfile`.

`sudo docker build -t refactory ./docker/`

#### Refactory's CLI

##### General Flags
Refactory has a command line interface in run.py. In general, you need to declare the path of data directory via `-d`, the question name via `-q`, and a list of sampling rate via `-s`. Besides, add `-o` to enable online refactoring, add `-f` to enable offline refacotirng,add `-b` to enable block repair, and add `-m` to enable structure mutation. Please kindly note that Refactory currently does not support to enable both online and offline refactoring. For example, consider the following command.

`python run.py -d ./data -q question_123 -s 100 -o -m -b`

Those flags indicate you want to run Refactory with 100% sampling rate, online refactoring, and structure mutaion and block repair to fix all buggy programs in question_123 in the `./data` directory.

##### Flag for Log Generation
By default, Refactory logs repairs, time consumotion, relative patch size and etc into a csv file for each question. The csv file name is refactory_*.csv, where * should be 'online', 'offline', or 'norefactor'. To combine logs for different questions, you can use `-c` flag to ask Refactory to combine all csv files generated under the same setting into one csv file.
Empty file added basic_framework/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions basic_framework/block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Developers: Yang Hu, et al.
# Email: huyang0905@gmail.com

from basic_framework.statement import *


def is_pass_block(bb):
line_list = bb.split("\n")[:-1]
if len(line_list) == 1:
token_list = get_token_list(bb)
for token in token_list:
if token.string == "pass":
return True
return False


def is_empty_block(bb):
return len(bb) == 0 or is_pass_block(bb)
276 changes: 276 additions & 0 deletions basic_framework/cfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
# Developers: Yang Hu, et al.
# Email: huyang0905@gmail.com

from basic_framework.utils import regularize, rm_bb_indent, resume_bb_indent
from basic_framework.statement import *
from fastcache import clru_cache
import random
import copy
import sys


@clru_cache(maxsize=1024)
def get_cfs_map(code):
func_map = get_func_map(code)

cfs_map = {}
for func_name, func_code in func_map.items():
bb_list, stru_list, indent_list = get_func_cfs(func_code)
cfs_map[func_name] = (bb_list, stru_list, indent_list)
return cfs_map


def cfs_map_equal(cfs_map_a, cfs_map_b):
if set(cfs_map_a.keys()) != set(cfs_map_b.keys()):
return False

for cfs_func in cfs_map_a.keys():
_, stru_list_a, indent_list_a = cfs_map_a[cfs_func]
_, stru_list_b, indent_list_b = cfs_map_b[cfs_func]
if stru_list_a != stru_list_b or \
indent_list_a != indent_list_b:
return False
return True


def get_func_map(code):

class FuncVisitor(ast.NodeVisitor):
def __init__(self):
super()
self.func_map = {}

def visit_FunctionDef(self, node):
self.func_map[node.name] = regularize(astunparse.unparse(node))

def run(self, code):
n = ast.parse(code)
self.visit(n)
return self.func_map

return FuncVisitor().run(code)


def get_func_cfs(code):
line_list = code.split("\n")
block_list = []
stru_list = []
indent_list = []
block_code = ""

curr_ind = 0
for line in line_list:
if "empty_hole" in line or len(line) == 0:
continue
if is_method_sign(line):
block_code = ""
block_list.append(line + "\n")
stru_list.append('sig')
indent_list.append(curr_ind)
curr_ind += 4
elif is_if_stat(line):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(curr_ind)

line_ind = get_indent(line)
if curr_ind < line_ind:
block_list.append("")
stru_list.append("bb")
indent_list.append(line_ind)
elif curr_ind > line_ind:
for k in range(curr_ind - 4, line_ind - 4, -4):
block_list.append("")
stru_list.append("bb")
indent_list.append(k)

block_code = ""
block_list.append(line + "\n")
stru_list.append('if')
indent_list.append(line_ind)
curr_ind = line_ind + 4
elif is_elif_stat(line):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(curr_ind)

line_ind = get_indent(line)
if curr_ind > line_ind:
for k in range(curr_ind - 4, line_ind, -4):
block_list.append("")
stru_list.append("bb")
indent_list.append(k)

block_code = ""
block_list.append(line + "\n")
stru_list.append('elif')
indent_list.append(get_indent(line))
curr_ind = get_indent(line) + 4
elif is_else_stat(line):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(curr_ind)

line_ind = get_indent(line)
if curr_ind > line_ind:
for k in range(curr_ind - 4, line_ind, -4):
block_list.append("")
stru_list.append("bb")
indent_list.append(k)

block_code = ""
block_list.append(line + "\n")
stru_list.append('else')
indent_list.append(get_indent(line))
curr_ind = get_indent(line) + 4
elif is_for_loop_stat(line):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(curr_ind)

line_ind = get_indent(line)
if curr_ind < line_ind:
block_list.append("")
stru_list.append("bb")
indent_list.append(line_ind)
elif curr_ind > line_ind:
for k in range(curr_ind - 4, line_ind - 4, -4):
block_list.append("")
stru_list.append("bb")
indent_list.append(k)

block_code = ""
block_list.append(line + "\n")
stru_list.append('for')
indent_list.append(line_ind)
curr_ind = line_ind + 4
elif is_while_loop_stat(line):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(curr_ind)

line_ind = get_indent(line)
if curr_ind < line_ind:
block_list.append("")
stru_list.append("bb")
indent_list.append(line_ind)
elif curr_ind > line_ind:
for k in range(curr_ind - 4, line_ind - 4, -4):
block_list.append("")
stru_list.append("bb")
indent_list.append(k)

block_code = ""
block_list.append(line + "\n")
stru_list.append('while')
indent_list.append(line_ind)
curr_ind = line_ind + 4
else:
ind = get_indent(line)
if ind == curr_ind:
block_code += line + "\n"
elif ind > curr_ind:
for tmp_ind in range(curr_ind, ind, 4):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(tmp_ind)
block_code = ""
block_code = line + "\n"
curr_ind = ind
else:
for tmp_ind in range(curr_ind, ind, -4):
block_list.append(block_code)
stru_list.append('bb')
indent_list.append(tmp_ind)
block_code = ""
block_code = line + "\n"
curr_ind = ind
if len(block_code) > 0:
block_list.append(block_code)
stru_list.append('bb')
curr_ind = get_indent(block_code.split("\n")[0])
indent_list.append(curr_ind)
block_code = ""
if curr_ind > 4:
for ind in range(curr_ind - 4, 0, -4):
block_list.append(block_code)
block_code = ""
stru_list.append('bb')
indent_list.append(ind)

assert (len(block_list) == len(stru_list) and
len(block_list) == len(indent_list))
return block_list, stru_list, indent_list


def cfs_mutation(bug_code, corr_code):
bug_cfs_map = get_cfs_map(bug_code)
corr_cfs_map = get_cfs_map(corr_code)

print("structure mutation")

rev_bug_func_map = {}

lose_func_list = []
from basic_framework.distance import cpr_stru_list
for func_name in corr_cfs_map.keys():
corr_bb_list, corr_stru_list, corr_indent_list = corr_cfs_map[func_name]
corr_stru_str = cpr_stru_list(corr_stru_list)

if func_name not in bug_cfs_map.keys():
rev_bug_func_map[func_name] = "".join(corr_bb_list)
lose_func_list.append(func_name)
else:
edit_list = []
bug_bb_list, bug_stru_list, bug_indent_list = bug_cfs_map[func_name]

if bug_stru_list == corr_stru_list and \
bug_indent_list == corr_indent_list:
pass
else:
bug_stru_str = cpr_stru_list(bug_stru_list)
import Levenshtein
edit_list = Levenshtein.editops(bug_stru_str, corr_stru_str)

insert_map = {}
del_set = set()
for edit_op, src_idx, dst_idx in edit_list:
if edit_op == "replace":
bug_bb_list[src_idx] = corr_bb_list[dst_idx]
elif edit_op == "insert":
if src_idx not in insert_map.keys():
insert_map[src_idx] = []

flt_corr_bb_list = [corr_bb_list[idx] for idx in range(len(corr_stru_list)) if corr_stru_list[idx] == corr_stru_list[dst_idx]]

assert(len(flt_corr_bb_list) > 0)
sel_corr_bb = random.sample(flt_corr_bb_list, 1)[0]

insert_map[src_idx].append(sel_corr_bb)
elif edit_op == "delete":
del_set.add(src_idx)

new_bug_bb_list = []
for idx in range(len(bug_bb_list)):
if idx in insert_map.keys():
new_bug_bb_list.extend(insert_map[idx])
bug_bb = bug_bb_list[idx]
if idx not in del_set:
new_bug_bb_list.append(bug_bb)

if len(bug_bb_list) in insert_map.keys():
new_bug_bb_list.extend(insert_map[len(bug_bb_list)])

assert(len(corr_indent_list) == len(new_bug_bb_list))

for idx in range(len(corr_indent_list)):
o_ind_bb, _ = rm_bb_indent(new_bug_bb_list[idx])
if len(o_ind_bb) == 0:
o_ind_bb = "pass\n"

new_bb = resume_bb_indent(o_ind_bb, corr_indent_list[idx])
new_bug_bb_list[idx] = new_bb

rev_bug_func_map[func_name] = "".join(new_bug_bb_list)

return "\n\n".join(list(rev_bug_func_map.values())), lose_func_list
Loading

0 comments on commit 9941a8a

Please sign in to comment.