In [128]:
def match_bracket(source: str, index: int, opening_type: str, closing_type: str) -> tuple((int, int)):
    """
    Find the start and end index in which the first opening bracket is closed without nesting. 
    """
    start = source[index:].find(opening_type) + index + len(opening_type)
    if start == -1:
        return index, index
    end = start
    count = 1
    for i, char in enumerate(source[start:]):
        if char == opening_type:
            count += 1
        elif char == closing_type:
            count -= 1
        if count == 0:
            end = start + i
            break
    return start, end


source = """if ((a < 2) && (b > 1))
{
    b = 1; 
}"""
start, end = match_bracket(source, 0, '(', ')')
assert source[start:end] == '(a < 2) && (b > 1)'
assert start == 4
assert end == 22


In [167]:
import re


def extract_cond_block(source: str, previous_condition: str = '') -> tuple((str, str, str, str, str)):
    """
    Extracts the first if block from the source code.
    Returns a tuple of the code before the block, the condition, the content of the block, and the code after the block.
    """
    indent_regex = re.compile(r'^\s+', flags=re.MULTILINE)
    # cases depending on how user writes the code
    # case1: if statement
    #   case 1a: if followed by {
    #   case 1b: if without a {
    # case2: else statement
    #   case 2a: else followed by {
    #   case 2b: else without a {
    #     case 2b1: else if followed by {
    #     case 2b2: else if without a {
    #     case 2b3: else without an if

    # check whether `if` or `else` comes first in the source code
    if_start = source.find('if')
    else_start = source.find('else')
    if if_start == -1 and else_start == -1:
        return source, '', '', '', ''
    elif if_start == -1:
        return extract_cond_block_helper(source, previous_condition, conditional='else')
    elif else_start == -1:
        return extract_cond_block_helper(source, previous_condition, conditional='if')
    else:
        if if_start < else_start:
            return extract_cond_block_helper(source, previous_condition, conditional='if')
        else:
            return extract_cond_block_helper(source, previous_condition, conditional='else')


def extract_cond_block_helper(source: str, previous_condition: str = '', conditional: str = 'if') -> tuple((str, str, str, str, str)):
    # multiple spaces, tabs, newlines regex
    space_regex = re.compile(r'\s\s+')
    if conditional == 'if':
        condition_start, condition_end = match_bracket(
            source, source.find('if'), '(', ')')
        # replace multiple spaces, tabs, newlines with a single space
        condition = re.sub(
            space_regex, ' ', source[condition_start:condition_end].strip())
        remaining = source[condition_end + len(')'):]
    elif conditional == 'else':
        condition = '!( ' + previous_condition + ' )'
        condition_end = source.find('else')
        remaining = source[condition_end + len('else'):]
    else:
        raise Exception('Invalid type')
    if remaining.lstrip().find('{') == 0:
        # conditional with an {
        content_start, content_end = match_bracket(
            source, condition_end + len(')'), '{', '}')
        content = re.sub(
            space_regex, ' ', source[content_start:content_end].strip())
        return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[content_end + len('}'):].lstrip()
    else:
        # find the next if or else statement, whichever comes first
        next_if = source[condition_end + len(')'):].find('if')
        next_else = source[condition_end + len(')'):].find('else')
        print("No bracket", next_if, next_else)
        if next_if == -1 and next_else == -1:
            content = re.sub(
                space_regex, ' ', source[condition_end + len(')'):].strip())
            return source[:source.find(conditional)].rstrip(), conditional, condition, content, ''
        elif next_if == -1:
            content = re.sub(
                space_regex, ' ', source[condition_end + len(')'):condition_end + len(')') + next_else].strip())
            return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[condition_end + len(')') + next_else:].lstrip()
        elif next_else == -1:
            content = re.sub(
                space_regex, ' ', source[condition_end + len(')'):condition_end + len(')') + next_if].strip())
            return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[condition_end + len(')') + next_if:].lstrip()
        else:
            if next_if < next_else:
                content = re.sub(
                    space_regex, ' ', source[condition_end + len(')'):condition_end + len(')') + next_if].strip())
                return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[condition_end + len(')') + next_if:].lstrip()
            else:
                content = re.sub(
                    space_regex, ' ', source[condition_end + len(')'):condition_end + len(')') + next_else].strip())
                return source[:source.find(conditional)].rstrip(), conditional, condition, content, source[condition_end + len(')') + next_else:].lstrip()

with open('test1.c', 'r') as f:
    source = f.read()
before, conditional, condition, content, after = extract_cond_block(source)
counter = 0
while after.strip() and counter < 10: 
    if before: 
        print(before)
        print("=====================")    
    print('if', condition)
    print(content)
    print("=====================")
    before, conditional, condition, content, after = extract_cond_block(after, condition)
    counter += 1
print(before)

a = 1;
if (a < 2) && (b > 1)
b = 1; c = 2; if (bruh == 1) a = 2;
No bracket 26 51
if !( (a < 2) && (b > 1) )
lse b = 2; a = 3;
No bracket -1 15
if a < 4
b = 3;
if !( a < 4 )
b = 4;
a = 5;
