In [1]:
import re

def extract_if_block(source: str) -> tuple((str, str, str, str)):
    """
    Extracts the first if block from the source code.
    Returns a tuple of the code before the block, the condition, the content of the block, and the code after the block.
    """

    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    try: 
        code_before_block = re.sub(indent_regex, '', source[:source.index('if')].strip())
    except ValueError:
        return source, None, None, None

    condition_start = re.search('if\s*\(', source).end()
    condition_end = source.index(')', condition_start)
    condition = source[condition_start:condition_end].strip()
    
    # search for the end } by matching the number of { and } in the content
    content_start = source.index('{', condition_end) + 1
    brace_count = 1
    for i, c in enumerate(source[content_start:]):
        if c == '{':
            brace_count += 1
        elif c == '}':
            brace_count -= 1
            if brace_count == 0:
                content_end = content_start + i
                break

    content = re.sub(indent_regex, '', source[content_start:content_end].strip())
    all_code_after_block = re.sub(indent_regex, '', source[content_end+1:].strip())
    return code_before_block, condition, content, all_code_after_block

In [2]:
with open('source.c', 'r') as f:
    source = f.read()
code_before_block, condition, content, all_code_after_block = extract_if_block(source)
print("================================ before: ")
print(code_before_block)
print("================================ condition: ")
print(condition)
print("================================ content: ")
print(content)
print("================================ after: ")
print(all_code_after_block)

a = 10;
a < b
a = a + 1; 
b = 12; 
if (a > b) {
a = a + 14; 
}
c = b + 11;
c = c + 5;


In [3]:
class IfNode: 
    """
    Represents a node in the if tree.
    """
    def __init__(self, data: str, children: list, depth: int, condition: bool=False, root: bool=False):
        self.data: str = data
        self.children: list[IfNode] = children
        self.condition: bool = condition
        self.variables: dict[str: str] = {} # {variable: modification}
        self.depth: int = depth
        self.root: bool = root

In [4]:
def build_iftree_helper(parent: IfNode, content: str, depth: int) -> IfNode: 
    """
    Builds the if tree recursively.
    """
    if not content: 
        return
    code_before_block, condition, content, all_code_after_block = extract_if_block(content)
    if code_before_block:
        parent.children.append(IfNode(code_before_block, [], depth))
    if condition: 
        wNode = IfNode(condition, [], depth, True)
        parent.children.append(wNode)
        build_iftree_helper(wNode, content, depth+1)
        build_iftree_helper(parent, all_code_after_block, depth)
    return parent

def build_iftree(content: str) -> IfNode: 
    """
    Builds the if tree from the source code.
    """
    wNode = IfNode('__root__', [], 0, True, True)
    return build_iftree_helper(wNode, content, 1)

In [5]:
def print_iftree(node: IfNode, indent: int=-1, childNum: list=[]) -> None:
    """
    Prints the if tree, with indentation and child numbers for easier selection. 
    """
    if node.condition:
        print('  '*indent + 'if (' + node.data + ') {', '->', 'depth:', node.depth, 'selection:', childNum, 'variables:', node.variables)
    else: 
        for line in node.data.splitlines():
            print('  '*indent + line, '->', 'selection:', childNum, 'variables:', node.variables)
    for idx, child in enumerate(node.children):
        print_iftree(child, indent+1, childNum + [idx]) 
    if node.condition:
        print('  '*indent + '}')

In [6]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
print_iftree(tree)

if (__root__) { -> depth: 0 selection: [] variables: {}
a = 10; -> selection: [0] variables: {}
if (a < b) { -> depth: 1 selection: [1] variables: {}
  a = a + 1;  -> selection: [1, 0] variables: {}
  b = 12; -> selection: [1, 0] variables: {}
  if (a > b) { -> depth: 2 selection: [1, 1] variables: {}
    a = a + 14; -> selection: [1, 1, 0] variables: {}
  }
  c = b + 11; -> selection: [1, 2] variables: {}
}
c = c + 5; -> selection: [2] variables: {}
}


In [7]:
import re
def extract_vars(code: str) -> list[str]:
    """
    Extracts all variables from a line of code.
    """
    # Extract variables that start with a letter
    variables = re.findall(r'\b[a-zA-Z]\w*\b', code)
    # remove keywords in C, such as int, float, double, etc.
    variables = [var for var in variables if var not in ['int', 'float', 'double', 'char', 'long', 'short', 'signed', 'unsigned', 'void', 'bool', 'true', 'false', 'if', 'else', 'while', 'for', 'do', 'switch', 'case', 'break', 'continue', 'return', 'goto', 'sizeof', 'typedef', 'struct', 'union', 'enum', 'static', 'extern', 'auto', 'register', 'const', 'volatile', 'inline', 'restrict', 'default', 'asm', 'alignas', 'alignof', 'atomic', 'noreturn', 'thread_local', 'complex', 'imaginary', 'NULL', 'offsetof', 'static_assert', 'wchar_t']]
    # remove duplicates and sort
    variables = list(dict.fromkeys(variables))
    variables.sort()
    return variables

def extract_vars_from_iftree(node: IfNode) -> list[str]:
    """
    Extracts all variables from the if tree.
    """
    variables = []
    for line in node.data.splitlines():
        variables += extract_vars(line)
    for child in node.children:
        variables += extract_vars_from_iftree(child)
    # remove duplicates and sort
    variables = list(dict.fromkeys(variables))
    variables.sort()
    return variables

In [8]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
test_string = 'int b = 1; int a = 2; int b = 3;'
print(extract_vars(test_string))
print(extract_vars_from_iftree(tree))

['a', 'b']
['a', 'b', 'c']


In [9]:
def extract_modifications(variable: str, code: str) -> list[str]:
    """
    Extracts all modifications of a variable in a line of code.
    """
    # Extract modifications of the variable
    modifications = re.findall(r'\b' + variable + r'\b\s*=\s*[^;]+', code)
    # keep only the right side of the assignment
    modifications = [re.sub(r'\b' + variable + r'\b\s*=\s*', '', modification) for modification in modifications]
    return modifications

In [10]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
print(extract_modifications('a', tree.children[0].data))
print(extract_modifications('a', tree.children[1].children[0].data))

['10']
['a + 1']


In [11]:
def gen_var_tree_helper(variable: str, root: IfNode, parent: IfNode, depth: int) -> None: 
    """
    Helper function for gen_var_tree.
    """
    if not root:
        return
    if extract_modifications(variable, root.data): 
        root.variables[variable] = extract_modifications(variable, root.data)[0]

def gen_var_tree(variable: str, node: IfNode, parent: IfNode=None, depth: int=0) -> None:
    """
    Updates/mutates the variable property of the node and its children using pre-order traversal.
    """
    if not node: 
        return [] 
    mutations = gen_var_tree_helper(variable, node, parent, depth)
    for child in node.children:
        mutations = gen_var_tree(variable, child, node, depth+1)
    return mutations

In [12]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variables = extract_vars_from_iftree(tree)
for variable in variables:
    gen_var_tree(variable, tree)
print_iftree(tree)

if (__root__) { -> depth: 0 selection: [] variables: {}
a = 10; -> selection: [0] variables: {'a': '10'}
if (a < b) { -> depth: 1 selection: [1] variables: {}
  a = a + 1;  -> selection: [1, 0] variables: {'a': 'a + 1', 'b': '12'}
  b = 12; -> selection: [1, 0] variables: {'a': 'a + 1', 'b': '12'}
  if (a > b) { -> depth: 2 selection: [1, 1] variables: {}
    a = a + 14; -> selection: [1, 1, 0] variables: {'a': 'a + 14'}
  }
  c = b + 11; -> selection: [1, 2] variables: {'c': 'b + 11'}
}
c = c + 5; -> selection: [2] variables: {'c': 'c + 5'}
}


In [14]:
def combine_modifications(mutations: list[str], variable: str) -> str: 
    """
    Combines all mutations of a variable into a single string.
    """
    prev_mutated = ""
    while mutations: 
        mutation = mutations.pop(0)
        if re.search(r'\b' + variable + r'\b', prev_mutated):
            # if the variable is in the modification, replace it with the combined modification
            prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation)
        else: 
            # if the variable is not in the modification, append it to the combined modification
            if prev_mutated.strip():
                # check if variable is inside the modification
                if re.search(r'\b' + variable + r'\b', mutation):
                    prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation)
                else: 
                    prev_mutated = mutation
            else: 
                prev_mutated = mutation
    if not prev_mutated.strip():
        prev_mutated = variable
    return prev_mutated

In [16]:
test_modifications = ['a', 'a + 2', 'b * 3']
print(combine_modifications(test_modifications, 'a'))
test_modifications = ['a', 'a + 2', 'b * 3 + a']
print(combine_modifications(test_modifications, 'a'))

b * 3
b * 3 + a + 2


In [17]:
def make_list_of_mutations(root: IfNode, variable: str, decision_tree: list[bool]): 
    """
    Makes a list of all possible mutations of a variable using pre-order traversal.
    """
    if not root: 
        return [] 
    mutations = []
    if root.condition and decision_tree: 
        if decision_tree[0]: 
            for child in root.children:
                mutations += make_list_of_mutations(child, variable, decision_tree[1:])
    else: 
        if variable in root.variables:
            mutations.append(root.variables[variable])
    return mutations

In [20]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variables = extract_vars_from_iftree(tree)
for variable in variables:
    gen_var_tree(variable, tree)
mutations = make_list_of_mutations(tree, 'a', [True, True, False])
print(mutations)
mutations = make_list_of_mutations(tree, 'a', [True, True, True])
print(combine_modifications(mutations, 'a'))

['10', 'a + 1']
10 + 1 + 14


In [22]:
def print_mutation_tree(node: IfNode, index: int, decision_tree: list[bool], root: IfNode, parent: IfNode=None) -> None:
    """
    Prints the mutation tree.
    """
    if node.root: 
        for child in node.children:
            print_mutation_tree(child, index, decision_tree + [True], root, node)
        return
    if not node.children and parent.condition and len(parent.children) == 1: 
        print_mutation(node, index, decision_tree, root)
        return
    if node.condition:
        print_mutation_if(node, index, decision_tree, root)
        for child in node.children:
            print_mutation_tree(child, index+1, decision_tree + [True], root, node)
        print_mutation_else(node, index)
        for child in node.children:
            print_mutation_tree(child, index+1, decision_tree + [False], root, node)
        print_mutation_end(node, index)
    
def print_mutation_if(node: IfNode, index: int, decision_tree: list[bool], root: IfNode) -> None:
    condition = "" + node.data
    for variable in extract_vars_from_iftree(root):
        # check if variable is in the condition
        if re.search(r'\b' + variable + r'\b', node.data):
            # if variable is in the condition, replace it with the combined modification
            condition = re.sub(r'\b' + variable + r'\b', combine_modifications(make_list_of_mutations(root, variable, decision_tree), variable), condition)
    print('  ' * (index) + 'if (', condition, ') begin')
    return

def print_mutation(node: IfNode, index: int, decision_tree: list[bool], root: IfNode) -> None:
    for variable in extract_vars_from_iftree(root):
        # print('  ' * (index) + variable, '<=', combine_modifications(make_list_of_mutations(root, variable, decision_tree), variable))
        modification = combine_modifications(make_list_of_mutations(root, variable, decision_tree), variable)
        for dependent_variable in extract_vars_from_iftree(root):
            if dependent_variable != variable:
                if re.search(r'\b' + dependent_variable + r'\b', modification):
                    modification = re.sub(r'\b' + dependent_variable + r'\b', combine_modifications(make_list_of_mutations(root, dependent_variable, decision_tree), dependent_variable), modification)
        if modification != variable:
            print('  ' * (index) + variable, '<=', modification)
    return

def print_mutation_else(node: IfNode, index: int) -> None:
    print('  ' * (index) + 'end else begin')
    return

def print_mutation_end(node: IfNode, index: int) -> None:
    print('  ' * (index) + 'end')
    return    

with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variables = extract_vars_from_iftree(tree)
for variable in variables:
    gen_var_tree(variable, tree)
print_mutation_tree(tree, 0, [], tree)

if ( 10 < b ) begin
  if ( 10 + 1 > 12 ) begin
    a <= 10 + 1 + 14
    b <= 12
    c <= 12 + 11 + 5
  end else begin
    a <= 10 + 1
    b <= 12
    c <= 12 + 11 + 5
  end
end else begin
  if ( 10 > b ) begin
    a <= 10
    c <= c + 5
  end else begin
    a <= 10
    c <= c + 5
  end
end
