In [1]:
import re

def extract_if_block(source: str) -> tuple((str, str, str, str)):
    """
    Extracts the first if block from the source code.
    Returns a tuple of the code before the block, the condition, the content of the block, and the code after the block.
    """

    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    try: 
        code_before_block = re.sub(indent_regex, '', source[:source.index('if')].strip())
    except ValueError:
        return source, None, None, None

    condition_start = re.search('if\s*\(', source).end()
    condition_end = source.index(')', condition_start)
    condition = source[condition_start:condition_end].strip()
    
    # search for the end } by matching the number of { and } in the content
    content_start = source.index('{', condition_end) + 1
    brace_count = 1
    for i, c in enumerate(source[content_start:]):
        if c == '{':
            brace_count += 1
        elif c == '}':
            brace_count -= 1
            if brace_count == 0:
                content_end = content_start + i
                break

    content = re.sub(indent_regex, '', source[content_start:content_end].strip())
    all_code_after_block = re.sub(indent_regex, '', source[content_end+1:].strip())
    return code_before_block, condition, content, all_code_after_block

In [2]:
with open('source.c', 'r') as f:
    source = f.read()
code_before_block, condition, content, all_code_after_block = extract_if_block(source)
print("================================ before: ")
print(code_before_block)
print("================================ condition: ")
print(condition)
print("================================ content: ")
print(content)
print("================================ after: ")
print(all_code_after_block)

a = 10;
a < b
a = a + 1; 
b = 12; 
if (a > b) {
a = a + 14; 
}
c = 11;
c = 123;


In [3]:
class IfNode: 
    """
    Represents a node in the if tree.
    """
    def __init__(self, data: str, children: list, condition: bool=False):
        self.data: str = data
        self.children: list[IfNode] = children
        self.condition: bool = condition
        self.variables: dict[str: str] = {} # {variable: modification}

In [4]:
def build_iftree_helper(parent: IfNode, content: str) -> IfNode: 
    """
    Builds the if tree recursively.
    """
    if not content: 
        return
    code_before_block, condition, content, all_code_after_block = extract_if_block(content)
    if code_before_block:
        parent.children.append(IfNode(code_before_block, []))
    if condition: 
        wNode = IfNode(condition, [], True)
        parent.children.append(wNode)
        build_iftree_helper(wNode, content)
        build_iftree_helper(parent, all_code_after_block)
    return parent

def build_iftree(content: str) -> IfNode: 
    """
    Builds the if tree from the source code.
    """
    wNode = IfNode('__root__', [])
    return build_iftree_helper(wNode, content)

In [33]:
def print_iftree(node: IfNode, indent: int=-1, childNum: list=[]) -> None:
    """
    Prints the if tree, with indentation and child numbers for easier selection. 
    """
    if node.condition:
        print('  '*indent + 'if (' + node.data + ') {', '->', 'selection:', childNum, 'variables:', node.variables)
    else: 
        for line in node.data.splitlines():
            print('  '*indent + line, '->', 'selection:', childNum, 'variables:', node.variables)
    for idx, child in enumerate(node.children):
        print_iftree(child, indent+1, childNum + [idx]) 
    if node.condition:
        print('  '*indent + '}')

In [34]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
print_iftree(tree)

__root__ -> selection: [] variables: []
a = 10; -> selection: [0] variables: []
if (a < b) { -> selection: [1] variables: []
  a = a + 1;  -> selection: [1, 0] variables: []
  b = 12; -> selection: [1, 0] variables: []
  if (a > b) { -> selection: [1, 1] variables: []
    a = a + 14; -> selection: [1, 1, 0] variables: []
  }
  c = 11; -> selection: [1, 2] variables: []
}
c = 123; -> selection: [2] variables: []


In [35]:
import re
def extract_vars(code: str) -> list[str]:
    """
    Extracts all variables from a line of code.
    """
    # Extract variables that start with a letter
    variables = re.findall(r'\b[a-zA-Z]\w*\b', code)
    # remove keywords in C, such as int, float, double, etc.
    variables = [var for var in variables if var not in ['int', 'float', 'double', 'char', 'long', 'short', 'signed', 'unsigned', 'void', 'bool', 'true', 'false', 'if', 'else', 'while', 'for', 'do', 'switch', 'case', 'break', 'continue', 'return', 'goto', 'sizeof', 'typedef', 'struct', 'union', 'enum', 'static', 'extern', 'auto', 'register', 'const', 'volatile', 'inline', 'restrict', 'default', 'asm', 'alignas', 'alignof', 'atomic', 'noreturn', 'thread_local', 'complex', 'imaginary', 'NULL', 'offsetof', 'static_assert', 'wchar_t']]
    # remove duplicates and sort
    variables = list(dict.fromkeys(variables))
    variables.sort()
    return variables

def extract_vars_from_iftree(node: IfNode) -> list[str]:
    """
    Extracts all variables from the if tree.
    """
    variables = []
    for line in node.data.splitlines():
        variables += extract_vars(line)
    for child in node.children:
        variables += extract_vars_from_iftree(child)
    # remove duplicates and sort
    variables = list(dict.fromkeys(variables))
    variables.sort()
    return variables

In [36]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
test_string = 'int b = 1; int a = 2; int b = 3;'
print(extract_vars(test_string))
print(extract_vars_from_iftree(tree))

['a', 'b']
['a', 'b', 'c']


In [37]:
def extract_modifications(variable: str, code: str) -> list[str]:
    """
    Extracts all modifications of a variable in a line of code.
    """
    # Extract modifications of the variable
    modifications = re.findall(r'\b' + variable + r'\b\s*=\s*[^;]+', code)
    # keep only the right side of the assignment
    modifications = [re.sub(r'\b' + variable + r'\b\s*=\s*', '', modification) for modification in modifications]
    return modifications

In [38]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
print(extract_modifications('a', tree.children[0].data))
print(extract_modifications('a', tree.children[1].children[0].data))

['10']
['a + 1']


In [82]:
def gen_var_tree_helper(variable: str, root: IfNode, parent: IfNode, depth: int) -> None: 
    """
    Helper function for gen_var_tree.
    """
    if not root:
        return []
    if extract_modifications(variable, root.data) and not root.condition: 
        if len(root.variables) == 0: 
            print("HERE")
            root.variables.append(Mutation({variable: [extract_modifications(variable, root.data)[0]]}, None))
        else:        
            print(root.variables, depth)
            while len(root.variables) <= depth:
                print(root.variables, depth)
                root.variables.append(Mutation(root.variables[depth-1].variables, ''))
            if root.variables[depth].condition == '': 
                root.variables[depth].condition = parent.data
            elif root.variables[depth].condition != parent.data:
                print("gen_var_tree_helper: condition mismatch:", root.variables[depth].condition, parent.data)
            if depth == 0 or variable not in root.variables[depth-1].variables: 
                root.variables[depth].variables[variable] = [extract_modifications(variable, root.data)[0]]     
            else:      
                root.variables[depth].variables[variable] = root.variables[depth-1].variables[variable] + [extract_modifications(variable, root.data)[0]]
            print("depth:", depth, "mutation:", root.variables[depth].variables[variable])
    return root.variables

def gen_var_tree(variable: str, node: IfNode, mutations: list[Mutation], parent: IfNode=None, depth: int=0) -> list[Mutation]: 
    """
    Updates/mutates the variable property of the node and its children using pre-order traversal.
    """
    if not node: 
        return [] 
    mutations = gen_var_tree_helper(variable, node, mutations, parent, depth)
    for child in node.children:
        mutations = gen_var_tree(variable, child, mutations, node, depth+1)
    return mutations

In [83]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
gen_var_tree('a', tree, [])
print_iftree(tree)

HERE
HERE
HERE
__root__ -> selection: [] variables: []
a = 10; -> selection: [0] variables: [[]]
if (a < b) { -> selection: [1] variables: []
  a = a + 1;  -> selection: [1, 0] variables: [[]]
  b = 12; -> selection: [1, 0] variables: [[]]
  if (a > b) { -> selection: [1, 1] variables: []
    a = a + 14; -> selection: [1, 1, 0] variables: [[]]
  }
  c = 11; -> selection: [1, 2] variables: []
}
c = 123; -> selection: [2] variables: []


In [None]:
def get_combined_mutations(mutations: list[Mutation], variable: str) -> str: 
    """
    Combines all mutations of a variable into a single string.
    """
    prev_mutated = ""
    while mutations: 
        mutation = mutations.pop(0)
        if re.search(r'\b' + variable + r'\b', prev_mutated):
            # if the variable is in the modification, replace it with the combined modification
            prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.modification)
        else: 
            # if the variable is not in the modification, append it to the combined modification
            if prev_mutated.strip():
                # check if variable is inside the modification
                if re.search(r'\b' + variable + r'\b', mutation.modification):
                    prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.modification)
                else: 
                    prev_mutated = prev_mutated + ' + ' + mutation.modification
            else: 
                prev_mutated = mutation.modification
    if not prev_mutated.strip():
        prev_mutated = variable
    return prev_mutated

def print_mutation_if(mutations: list[Mutation], index: int, variable: str, decision_tree: list[bool]) -> None: 
    """
    Prints the if statement for a mutation.
    """
    mutation = mutations[index]
    mutations = [mutation for mutation, decision in zip(mutations, decision_tree) if decision]
    prev_mutated = get_combined_mutations(mutations, variable)
    mutation.condition = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.condition)
    print('  ' * (index) + 'if (', mutation.condition, ') begin')
    return

def print_mutation_else(mutations: list[Mutation], index: int) -> None: 
    """
    Prints the else statement for a mutation.
    """
    print('  ' * (index) + 'end else begin')
    return

def print_mutation_end(mutations: list[Mutation], index: int) -> None: 
    """
    Prints the end statement for a mutation.
    """
    print('  ' * (index) + 'end')
    return

def print_mutation(mutations: list[Mutation], index: int, variable: str, decision_tree: list[bool]) -> None: 
    """
    Prints the mutation.
    """
    mutations = [mutation for mutation, decision in zip(mutations, decision_tree) if decision]
    prev_mutated = get_combined_mutations(mutations, variable)
    print('  ' * (index) + variable , '<=', prev_mutated)
    return

def print_mutation_tree(mutations: list[Mutation], index: int, variable: str, decision_tree: list[bool]) -> None: 
    """
    Prints the mutation tree.
    """
    if index == len(mutations):
        print_mutation(mutations, index, variable, decision_tree)
        return
    mutation = mutations[index]
    if mutation.condition.strip():
        print_mutation_if(mutations, index, variable, decision_tree)
        print_mutation_tree(mutations, index+1, variable, decision_tree + [True])
        print_mutation_else(mutations, index)
        print_mutation_tree(mutations, index+1, variable, decision_tree + [False])
        print_mutation_end(mutations, index)

In [None]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variable = 'a'
gen_var_tree(variable, tree, [])
print_iftree(tree)
mutations = tree.children[1].children[1].children[0].variables[variable]
print(mutations)
print_mutation_tree(mutations, 0, variable, [])

gen_var_tree_helper: condition mismatch


AttributeError: 'Mutation' object has no attribute 'modifications'

In [None]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variable = 'b'
gen_var_tree(variable, tree, [])
print_iftree(tree)
modifications = tree.children[2].variables[variable]
print(modifications)
print_mutation_tree(modifications, 0, variable, [])

__root__ -> [] {'b': []}
a = 10; -> [0] {'b': []}
if (a < b) { -> [1] {'b': []}
  a = a + 1;  -> [1, 0] {'b': [12 if a < b]}
  b = 12; -> [1, 0] {'b': [12 if a < b]}
  if (a > b) { -> [1, 1] {'b': [12 if a < b]}
    a = a + 14; -> [1, 1, 0] {'b': [12 if a < b]}
  }
  c = 11; -> [1, 2] {'b': [12 if a < b]}
}
c = 123; -> [2] {'b': [12 if a < b]}
[12 if a < b]
if ( a < b ) begin
  b <= 12
end else begin
  b <= b
end


In [None]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
variable = 'c'
gen_var_tree(variable, tree, [])
print_iftree(tree)
modifications = tree.children[2].variables[variable]
print(modifications)
print_mutation_tree(modifications, 0, variable, [])

__root__ -> [] {'c': []}
a = 10; -> [0] {'c': []}
if (a < b) { -> [1] {'c': []}
  a = a + 1;  -> [1, 0] {'c': []}
  b = 12; -> [1, 0] {'c': []}
  if (a > b) { -> [1, 1] {'c': []}
    a = a + 14; -> [1, 1, 0] {'c': []}
  }
  c = 11; -> [1, 2] {'c': [11 if a < b]}
}
c = 123; -> [2] {'c': [11 if a < b, 123]}
[11 if a < b, 123]
if ( a < b ) begin
  if ( __root__ ) begin
    c <= 11 + 123
  end else begin
    c <= 11
  end
end else begin
  if ( __root__ ) begin
    c <= 123
  end else begin
    c <= c
  end
end


In [None]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_iftree(source)
gen_var_tree('a', tree, [])
gen_var_tree('b', tree, [])
gen_var_tree('c', tree, [])
print_iftree(tree)

__root__ -> [] {'a': [], 'b': [], 'c': []}
a = 10; -> [0] {'a': [10], 'b': [], 'c': []}
if (a < b) { -> [1] {'a': [10], 'b': [], 'c': []}
  a = a + 1;  -> [1, 0] {'a': [10, a + 1 if a < b], 'b': [12 if a < b], 'c': []}
  b = 12; -> [1, 0] {'a': [10, a + 1 if a < b], 'b': [12 if a < b], 'c': []}
  if (a > b) { -> [1, 1] {'a': [10, a + 1 if a < b], 'b': [12 if a < b], 'c': []}
    a = a + 14; -> [1, 1, 0] {'a': [10, a + 1 if a < b, a + 14 if a > b], 'b': [12 if a < b], 'c': []}
  }
  c = 11; -> [1, 2] {'a': [10, a + 1 if a < b, a + 14 if a > b], 'b': [12 if a < b], 'c': [11 if a < b]}
}
c = 123; -> [2] {'a': [10, a + 1 if a < b, a + 14 if a > b], 'b': [12 if a < b], 'c': [11 if a < b, 123]}
