In [1]:
def match_bracket(source: str, index: int, opening_type: str, closing_type: str) -> tuple((int, int)):
    """
    Find the start and end index in which the first opening bracket is closed without nesting. 
    """
    start = source[index:].find(opening_type) + index + len(opening_type)
    if start == -1:
        return index, index
    end = start
    count = 1
    for i, char in enumerate(source[start:]):
        if char == opening_type:
            count += 1
        elif char == closing_type:
            count -= 1
        if count == 0:
            end = start + i
            break
    return start, end


with open('tests/test0.c', 'r') as f:
    source = f.read()
start, end = match_bracket(source, 0, '(', ')')
buf = source[start:end]
with open('tests/test0-result.txt', 'r') as f:
    assert f.read().strip() == buf


In [2]:
import re


def extract_cond_block(source: str, previous_condition: str = '') -> tuple((str, str, str, str, str)):
    """
    Extracts the first if block from the source code.
    Returns a tuple of the code before the block, the condition, the content of the block, and the code after the block.
    """
    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    if_start = source.find('if')
    else_start = source.find('else')
    if if_start == -1 and else_start == -1:
        return source, '', '', '', ''
    elif if_start == -1:
        return extract_cond_block_helper(source, previous_condition, conditional='else')
    elif else_start == -1:
        return extract_cond_block_helper(source, previous_condition, conditional='if')
    else:
        if if_start < else_start:
            return extract_cond_block_helper(source, previous_condition, conditional='if')
        else:
            return extract_cond_block_helper(source, previous_condition, conditional='else')


def extract_cond_block_helper(source: str, previous_condition: str = '', conditional: str = 'if') -> tuple((str, str, str, str, str)):
    """
    Helper function for extract_cond_block.
    """
    # multiple spaces, tabs, newlines regex
    space_regex = re.compile(r'\s\s+')
    if conditional == 'if':
        condition_start, condition_end = match_bracket(
            source, source.find('if'), '(', ')')
        # replace multiple spaces, tabs, newlines with a single space
        condition = re.sub(
            space_regex, ' ', source[condition_start:condition_end].strip())
        remaining = source[condition_end + len(')'):]
    elif conditional == 'else':
        condition = '!( ' + previous_condition + ' )'
        condition_end = source.find('else')
        remaining = source[condition_end + len('else'):]
    else:
        raise Exception('Invalid type')
    if remaining.lstrip().find('{') == 0:
        # conditional with an {
        content_start, content_end = match_bracket(
            source, condition_end + len(')'), '{', '}')
        content = re.sub(
            space_regex, ' ', source[content_start:content_end].strip())
        return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[content_end + len('}'):].lstrip()
    else:
        # find the next semicolon
        content_end = remaining.find(';')
        content = re.sub(
            space_regex, ' ', remaining[:content_end].strip())
        return source[:source.find(conditional)].rstrip(), conditional, condition, content, remaining[content_end + len(';'):].lstrip()

with open('tests/test1.c', 'r') as f:
    source = f.read()
before, conditional, condition, content, after = extract_cond_block(source)
counter = 0
buf = ""
while after.strip() and counter < 10: 
    if before: 
        buf += before + '\n'
        buf += "=====================" + '\n'
    buf += 'if' + condition + '\n'
    buf += content + '\n'
    buf += "=====================" + '\n'
    before, conditional, condition, content, after = extract_cond_block(after, condition)
    counter += 1
buf += before + '\n'
with open('tests/test1-result.txt', 'r') as f:
    assert buf.strip() == f.read().strip()