In [154]:
class IfBlock:
    def __init__(self, condition, content, code_before_if, all_code_after_if):
        self.condition = condition
        self.content = content
        self.code_before_if = code_before_if
        self.all_code_after_if = all_code_after_if

In [167]:
import re

def extract_if_block(source):
    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    try: 
        code_before_block = re.sub(indent_regex, '', source[:source.index('if')].strip())
    except ValueError:
        return source, None, None, None

    condition_start = re.search('if\s*\(', source).end()
    condition_end = source.index(')', condition_start)
    condition = source[condition_start:condition_end].strip()
    
    # search for the end } by matching the number of { and } in the content
    content_start = source.index('{', condition_end) + 1
    brace_count = 1
    for i, c in enumerate(source[content_start:]):
        if c == '{':
            brace_count += 1
        elif c == '}':
            brace_count -= 1
            if brace_count == 0:
                content_end = content_start + i
                break

    content = re.sub(indent_regex, '', source[content_start:content_end].strip())
    all_code_after_block = re.sub(indent_regex, '', source[content_end+1:].strip())
    return code_before_block, condition, content, all_code_after_block

In [168]:
with open('source.c', 'r') as f:
    source = f.read()
code_before_block, condition, content, all_code_after_block = extract_if_block(source)
print("================================ before: ")
print(code_before_block)
print("================================ condition: ")
print(condition)
print("================================ content: ")
print(content)
print("================================ after: ")
print(all_code_after_block)

a = 10;
a > b
a = a + 1;
if (a > b) {
a = a + 14; 
}
c = 11;
c = 123;


In [169]:
class IfNode: 
    def __init__(self, data, children, condition=False):
        self.data = data
        self.children = children
        self.condition = condition
        self.variables = {}

In [170]:
def build_tree_helper(parent, content): 
    if not content: 
        return
    code_before_block, condition, content, all_code_after_block = extract_if_block(content)
    if code_before_block:
        parent.children.append(IfNode(code_before_block, []))
    if condition: 
        wNode = IfNode(condition, [], True)
        parent.children.append(wNode)
        build_tree_helper(wNode, content)
        build_tree_helper(parent, all_code_after_block)
    return parent

def build_tree(content): 
    wNode = IfNode('Root', [])
    return build_tree_helper(wNode, content)

In [171]:
def print_tree(node, indent=-1, childNum=[]):
    if node.condition:
        print('  '*indent + 'if (' + node.data + ') {', '->', childNum)
    else: 
        for line in node.data.splitlines():
            print('  '*indent + line, '->', childNum)
    for idx, child in enumerate(node.children):
        print_tree(child, indent+1, childNum + [idx]) 
    if node.condition:
        print('  '*indent + '}')

In [172]:
# test for build_tree(source)
with open('source.c', 'r') as f:
    source = f.read()
tree = build_tree(source)
print_tree(tree)

a = 10;
a > b
a = a + 1;
if (a > b) {
a = a + 14; 
}
c = 11;
c = 123;
a = a + 1;
a > b
a = a + 14;
c = 11;
a = a + 14;
None
None
None
c = 11;
None
None
None
c = 123;
None
None
None
Root -> []
a = 10; -> [0]
if (a > b) { -> [1]
  a = a + 1; -> [1, 0]
  if (a > b) { -> [1, 1]
    a = a + 14; -> [1, 1, 0]
  }
  c = 11; -> [1, 2]
}
c = 123; -> [2]
