In [1]:
import re

# Assuming a column to ID mapping
column_id_mapping = {
    ('partsupp', 'ps_supplycost'): 0
}

def parse_filter(filter_expression):
    print(f"filter_expression: {filter_expression}")
    
    # Compound filter regex (e.g., "(col1 = 5 AND col2 > 3)")
    compound_regex = re.compile(r'\(\s*(.*?)\s*(AND|OR|NOT)\s*(.*?)\s*\)')
    
    # Simple filter regex (e.g., "partsupp.ps_supplycost <= 48.34")
    simple_regex = re.compile(r'(\w+\.\w+)\s*([<>!=]=?|IS NULL|IS NOT NULL)\s*([\d\.\-]+|\'.*?\'|\".*?\")')

    # Check for compound filters (recursively)
    match = compound_regex.match(filter_expression)
    if match:
        left_part = parse_filter(match.group(1).strip())
        right_part = parse_filter(match.group(3).strip())
        return {
            "column": None,
            "operator": match.group(2),
            "literal": None,
            "literal_feature": None,
            "children": [left_part, right_part]
        }

    # Check for simple filters (e.g., "partsupp.ps_supplycost <= 48.34")
    match_simple = simple_regex.match(filter_expression)
    print(f"match_simple: {match_simple}")
    if match_simple:
        column_name = match_simple.group(1)
        operator = match_simple.group(2)
        literal = match_simple.group(3).strip('"\'')

        # Look up the column ID from column_id_mapping
        table_column = tuple(column_name.split('.'))
        column_id = column_id_mapping.get(table_column)

        # Convert literal to appropriate type (int or float)
        try:
            if '.' in literal:
                literal = float(literal)
            else:
                literal = int(literal)
        except ValueError:
            # Keep it as a string if conversion fails
            pass

        return {
            "column": column_id,  # Use the mapped column ID
            "operator": operator,
            "literal": literal,
            "literal_feature": 0,  # Default feature index
            "children": []  # No children for simple filters
        }

    return None  # If unable to parse


# Example usage:
filter_expression = '(partsupp.ps_supplycost <= 48.34356607843329)'
parsed_filter = parse_filter(filter_expression)
print(parsed_filter)


filter_expression: (partsupp.ps_supplycost <= 48.34356607843329)
match_simple: None
None
