In [16]:
import re

def extract_if_block(source: str) -> tuple((str, str, str, str)):
    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    try: 
        code_before_block = re.sub(indent_regex, '', source[:source.index('if')].strip())
    except ValueError:
        return source, None, None, None

    condition_start = re.search('if\s*\(', source).end()
    condition_end = source.index(')', condition_start)
    condition = source[condition_start:condition_end].strip()
    
    # search for the end } by matching the number of { and } in the content
    content_start = source.index('{', condition_end) + 1
    brace_count = 1
    for i, c in enumerate(source[content_start:]):
        if c == '{':
            brace_count += 1
        elif c == '}':
            brace_count -= 1
            if brace_count == 0:
                content_end = content_start + i
                break

    content = re.sub(indent_regex, '', source[content_start:content_end].strip())
    all_code_after_block = re.sub(indent_regex, '', source[content_end+1:].strip())
    return code_before_block, condition, content, all_code_after_block

In [17]:
with open('source.c', 'r') as f:
    source = f.read()
code_before_block, condition, content, all_code_after_block = extract_if_block(source)
print("================================ before: ")
print(code_before_block)
print("================================ condition: ")
print(condition)
print("================================ content: ")
print(content)
print("================================ after: ")
print(all_code_after_block)

a = 10;
a < b
a = a + 1; 
b = 12; 
if (a > b) {
a = a + 14; 
}
c = 11;
c = 123;


In [18]:
class IfNode: 
    def __init__(self, data: str, children: list, condition: bool=False):
        self.data: str = data
        self.children: list[IfNode] = children
        self.condition: bool = condition
        self.variables: dict[str, list[Mutation]] = {}

In [19]:
def build_tree_helper(parent: IfNode, content: str) -> IfNode:  
    if not content: 
        return
    code_before_block, condition, content, all_code_after_block = extract_if_block(content)
    if code_before_block:
        parent.children.append(IfNode(code_before_block, []))
    if condition: 
        wNode = IfNode(condition, [], True)
        parent.children.append(wNode)
        build_tree_helper(wNode, content)
        build_tree_helper(parent, all_code_after_block)
    return parent

def build_tree(content: str) -> IfNode: 
    wNode = IfNode('__root__', [])
    return build_tree_helper(wNode, content)

In [20]:
def print_tree(node: IfNode, indent=-1, childNum=[]) -> None:
    if node.condition:
        print('  '*indent + 'if (' + node.data + ') {', '->', childNum, node.variables)
    else: 
        for line in node.data.splitlines():
            print('  '*indent + line, '->', childNum, node.variables)
    for idx, child in enumerate(node.children):
        print_tree(child, indent+1, childNum + [idx]) 
    if node.condition:
        print('  '*indent + '}')

In [21]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
print_tree(tree)

__root__ -> [] {}
a = 10; -> [0] {}
if (a < b) { -> [1] {}
  a = a + 1;  -> [1, 0] {}
  b = 12; -> [1, 0] {}
  if (a > b) { -> [1, 1] {}
    a = a + 14; -> [1, 1, 0] {}
  }
  c = 11; -> [1, 2] {}
}
c = 123; -> [2] {}


In [22]:
import re

def extract_variables(code):
    # Extract variables that start with a letter
    variables = re.findall(r'\b[a-zA-Z]\w*\b', code)
    # remove keywords in C, such as int, float, double, etc.
    variables = [var for var in variables if var not in ['int', 'float', 'double', 'char', 'long', 'short', 'unsigned', 'signed', 'void', 'struct', 'union', 'enum', 'typedef', 'const', 'volatile', 'auto', 'register', 'static', 'extern', 'inline', 'restrict', 'bool', 'complex', 'imaginary', 'break', 'case', 'continue', 'default', 'do', 'else', 'for', 'goto', 'if', 'return', 'sizeof', 'switch', 'while', 'alignas', 'alignof', 'atomic', 'noreturn', 'static_assert', 'thread_local', 'true', 'false', 'NULL']]
    return variables

def extract_variables_from_tree(node):
    variables = []
    for line in node.data.splitlines():
        variables += extract_variables(line)
    for child in node.children:
        variables += extract_variables_from_tree(child)
    # remove duplicates and sort
    variables = list(dict.fromkeys(variables))
    variables.sort()
    return variables

In [23]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
extract_variables_from_tree(tree.children[1].children[0])

['a', 'b']

In [24]:
def extract_modifications(variable, code): 
    # Extract modifications of the variable
    modifications = re.findall(r'\b' + variable + r'\b\s*=\s*[^;]+', code)
    # keep only the right side of the assignment
    modifications = [re.sub(r'\b' + variable + r'\b\s*=\s*', '', modification) for modification in modifications]
    return modifications

In [25]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
print(extract_modifications('a', tree.children[0].data))
print(extract_modifications('a', tree.children[1].children[0].data))

['10']
['a + 1']


In [26]:
class Mutation: 
    def __init__(self, modification, condition):
        self.modification = modification
        self.condition = condition
    
    def __repr__(self):
        if self.condition == None or not self.condition.strip():
            return self.modification
        if self.condition == '__root__':
            return self.modification
        return self.modification + ' if ' + self.condition

# def generate_variable_tree_dep(variable, root, modifications): 
#     if root == None or root.children == []:
#         return None
#     for child in root.children: 
#         if variable in extract_variables(child.data): 
#             if extract_modifications(variable, child.data): 
#                 modifications.append(Mutation(extract_modifications(variable, child.data)[0], root.data)) # TODO: fix the fact that extract_modifications returns a list
#         if variable in extract_variables_from_tree(child):
#             generate_variable_tree_dep(variable, child, modifications) 
#             local_modifications = [Mutation(modification.modification, modification.condition) for modification in modifications]
#             # check if variable is in the first modification using regex
#             if local_modifications and local_modifications[0].modification.strip() and re.search(r'\b' + variable + r'\b', local_modifications[0].modification):
#                 # append the variable itself as the first modification
#                 local_modifications.insert(0, Mutation(variable, root.data))
#             child.variables[variable] = local_modifications
#     return root

def work(variable: str, root: IfNode, mutations: list[Mutation], parent: IfNode) -> list[Mutation]: 
    if not root:
        return []
    mutations = [Mutation(modification.modification, modification.condition) for modification in mutations]
    if extract_modifications(variable, root.data) and not root.condition: 
        mutations.append(Mutation(extract_modifications(variable, root.data)[0], parent.data))
    root.variables[variable] = mutations
    return mutations

def generate_variable_tree(variable: str, node: IfNode, mutations: list[Mutation], parent: IfNode=None) -> list[Mutation]: 
    if not node: 
        return [] 
    mutations = work(variable, node, mutations, parent)
    for child in node.children:
        mutations = generate_variable_tree(variable, child, mutations, node)
    return mutations

In [27]:
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
generate_variable_tree('a', tree, [])
print_tree(tree)

__root__ -> [] {'a': []}
a = 10; -> [0] {'a': [10]}
if (a < b) { -> [1] {'a': [10]}
  a = a + 1;  -> [1, 0] {'a': [10, a + 1 if a < b]}
  b = 12; -> [1, 0] {'a': [10, a + 1 if a < b]}
  if (a > b) { -> [1, 1] {'a': [10, a + 1 if a < b]}
    a = a + 14; -> [1, 1, 0] {'a': [10, a + 1 if a < b, a + 14 if a > b]}
  }
  c = 11; -> [1, 2] {'a': [10, a + 1 if a < b, a + 14 if a > b]}
}
c = 123; -> [2] {'a': [10, a + 1 if a < b, a + 14 if a > b]}


In [28]:
def get_combined_mutations(variable: str, mutations: list[Mutation]) -> str: 
    prev_mutated = ""
    while mutations: 
        mutation = mutations.pop(0)
        if re.search(r'\b' + variable + r'\b', prev_mutated):
            # if the variable is in the modification, replace it with the combined modification
            prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.modification)
        else: 
            # if the variable is not in the modification, append it to the combined modification
            if prev_mutated.strip():
                # check if variable is inside the modification
                if re.search(r'\b' + variable + r'\b', mutation.modification):
                    prev_mutated = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.modification)
                else: 
                    prev_mutated = prev_mutated + ' + ' + mutation.modification
            else: 
                prev_mutated = mutation.modification
    if not prev_mutated.strip():
        prev_mutated = variable
    return prev_mutated

def print_mutation_if(variable: str, mutations: list[Mutation], decision_tree: list[bool], index: int) -> None: 
    mutation = mutations[index]
    mutations = [mutation for mutation, decision in zip(mutations, decision_tree) if decision]
    prev_mutated = get_combined_mutations(variable, mutations)
    mutation.condition = re.sub(r'\b' + variable + r'\b', prev_mutated, mutation.condition)
    print('  ' * (index) + 'if (', mutation.condition, ') begin')
    return

def print_mutation_else(mutations: list[Mutation], index: int) -> None: 
    print('  ' * (index) + 'end else begin')
    return

def print_mutation_end(mutations: list[Mutation], index: int) -> None: 
    print('  ' * (index) + 'end')
    return

def print_mutation_no_condition(variable: str, mutations: list[Mutation], decision_tree: list[bool], index: int) -> None: 
    mutations = [mutation for mutation, decision in zip(mutations, decision_tree) if decision]
    prev_mutated = get_combined_mutations(variable, mutations)
    print('  ' * (index) + variable , '<=', prev_mutated)
    return

def print_mutation_tree(mutations: list[Mutation], variable: str, decision_tree: list[bool], index: int) -> None: 
    if index == len(mutations):
        print_mutation_no_condition(variable, mutations, decision_tree, index)
        return
    mutation = mutations[index]
    if mutation.condition.strip():
        print_mutation_if(variable, mutations, decision_tree, index)
        print_mutation_tree(mutations, variable, decision_tree + [True], index+1)
        print_mutation_else(mutations, index)
        print_mutation_tree(mutations, variable, decision_tree + [False], index+1)
        print_mutation_end(mutations, index)

In [29]:
# test for combine_modifications(variables, tree)
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
variable = 'a'
generate_variable_tree(variable, tree, [])
modifications = tree.children[1].children[1].children[0].variables[variable]
print(modifications)
print_mutation_tree(modifications, variable, [], 0)    

[10, a + 1 if a < b, a + 14 if a > b]
if ( __root__ ) begin
  if ( 10 < b ) begin
    if ( 10 + 1 > b ) begin
      a <= 10 + 1 + 14
    end else begin
      a <= 10 + 1
    end
  end else begin
    if ( 10 + 1 > b ) begin
      a <= 10 + 14
    end else begin
      a <= 10
    end
  end
end else begin
  if ( 10 < b ) begin
    if ( 10 + 1 > b ) begin
      a <= a + 1 + 14
    end else begin
      a <= a + 1
    end
  end else begin
    if ( 10 + 1 > b ) begin
      a <= a + 14
    end else begin
      a <= a
    end
  end
end


In [32]:
# test for combine_modifications(variables, tree)
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
variable = 'b'
generate_variable_tree(variable, tree, [])
print_tree(tree)
modifications = tree.children[2].variables[variable]
print(modifications)
print_mutation_tree(modifications, variable, [], 0)    

__root__ -> [] {'b': []}
a = 10; -> [0] {'b': []}
if (a < b) { -> [1] {'b': []}
  a = a + 1;  -> [1, 0] {'b': [12 if a < b]}
  b = 12; -> [1, 0] {'b': [12 if a < b]}
  if (a > b) { -> [1, 1] {'b': [12 if a < b]}
    a = a + 14; -> [1, 1, 0] {'b': [12 if a < b]}
  }
  c = 11; -> [1, 2] {'b': [12 if a < b]}
}
c = 123; -> [2] {'b': [12 if a < b]}
[12 if a < b]
if ( a < b ) begin
  b <= 12
end else begin
  b <= b
end
