In [2]:
def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """
    import re

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parenthesis for nested conditions
        if conditions.startswith('(') and conditions.endswith(')'):
            return parse_conditions(conditions[1:-1])
        
        # Split on AND/OR operators
        if ' AND ' in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in conditions.split(" AND ")]
            }
        if ' OR ' in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in conditions.split(" OR ")]
            }

        # Parse a single condition
        field, operator, value = re.split(r'\s*(=|>|<|>=|<=|!=)\s*', conditions)
        field, value = field.strip(), value.strip()

        if value.isdigit():
            value = int(value)
        elif value.replace('.', '', 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError("Unsupported value type.")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    # Preprocess the query
    sql_query = re.sub(r'\s+', ' ', sql_query.strip())
    match = re.match(
        r'SELECT (.+) FROM (\w+)( WHERE (.+))?( ORDER BY (.+))?( LIMIT (\d+))?',
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT name, age FROM users 
WHERE (age >= 25 AND city = 'New York') OR (age < 18 AND city = 'Boston') 
ORDER BY age DESC, name ASC 
LIMIT 10
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: too many values to unpack (expected 3)

In [3]:
def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """
    import re

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parenthesis for nested conditions
        if conditions.startswith('(') and conditions.endswith(')'):
            return parse_conditions(conditions[1:-1])
        
        # Split on AND/OR operators
        if ' AND ' in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in conditions.split(" AND ")]
            }
        if ' OR ' in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in conditions.split(" OR ")]
            }

        # Parse a single condition using a more robust method
        match = re.match(r"(.+?)\s*(=|>|<|>=|<=|!=)\s*(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        if value.isdigit():
            value = int(value)
        elif value.replace('.', '', 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    # Preprocess the query
    sql_query = re.sub(r'\s+', ' ', sql_query.strip())
    match = re.match(
        r'SELECT (.+) FROM (\w+)( WHERE (.+))?( ORDER BY (.+))?( LIMIT (\d+))?',
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT name, age FROM users 
WHERE (age >= 25 AND city = 'New York') OR (age < 18 AND city = 'Boston') 
ORDER BY age DESC, name ASC 
LIMIT 10
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported value type in condition: (age >= 25

In [4]:
def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """
    import re

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parentheses for nested conditions
        if conditions.startswith('(') and conditions.endswith(')'):
            return parse_conditions(conditions[1:-1].strip())
        
        # Split on AND/OR operators
        if ' AND ' in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "AND")]
            }
        if ' OR ' in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "OR")]
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)\s*(=|>|<|>=|<=|!=)\s*(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace('.', '', 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_logical_conditions(condition_str, logical_operator):
        """Split a condition string on the specified logical operator while respecting nested parentheses."""
        logical_operator = f" {logical_operator} "
        result = []
        bracket_level = 0
        current = []
        
        for token in re.split(r'(\s+|\(|\)|\S+)', condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() == logical_operator.strip():
                result.append(''.join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append(''.join(current).strip())
        return result

    # Preprocess the query
    sql_query = re.sub(r'\s+', ' ', sql_query.strip())
    match = re.match(
        r'SELECT (.+) FROM (\w+)( WHERE (.+))?( ORDER BY (.+))?( LIMIT (\d+))?',
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT name, age FROM users 
WHERE (age >= 25 AND city = 'New York') OR (age < 18 AND city = 'Boston') 
ORDER BY age DESC, name ASC 
LIMIT 10
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported value type in condition: (age>=25ANDcity='NewYork')OR(age<18ANDcity='Boston')ORDERBYageDESC,nameASCLIMIT10

In [5]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on AND/OR operators
        if " AND " in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "AND")],
            }
        if " OR " in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "OR")],
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)\s*(=|>|<|>=|<=|!=)\s*(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_logical_conditions(condition_str, logical_operator):
        """Split a condition string on the specified logical operator while respecting nested parentheses."""
        result = []
        bracket_level = 0
        current = []

        for token in re.split(r"(\s+|\(|\)|\S+)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() == logical_operator:
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return result

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT name, age FROM users 
WHERE (age >= 25 AND city = 'New York') OR (age < 18 AND city = 'Boston') 
ORDER BY age DESC, name ASC 
LIMIT 10
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported value type in condition: age>=25

In [6]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on AND/OR operators
        if " AND " in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "AND")],
            }
        if " OR " in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "OR")],
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)(=|>|<|>=|<=|!=)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_logical_conditions(condition_str, logical_operator):
        """Split a condition string on the specified logical operator while respecting nested parentheses."""
        result = []
        bracket_level = 0
        current = []

        for token in re.split(r"(\s+|\(|\)|\S+)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() == logical_operator:
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return result

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT name, age FROM users 
WHERE (age>=25 AND city='New York') OR (age<18 AND city='Boston') 
ORDER BY age DESC, name ASC 
LIMIT 10
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported value type in condition: age>=25

In [12]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()
        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on AND/OR operators
        if " AND " in conditions:
            return {
                "$and": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "AND")],
            }
        if " OR " in conditions:
            return {
                "$or": [parse_conditions(cond) for cond in split_logical_conditions(conditions, "OR")],
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_logical_conditions(condition_str, logical_operator):
        """Split a condition string on the specified logical operator while respecting nested parentheses."""
        result = []
        bracket_level = 0
        current = []

        for token in re.split(r"(\s+|\(|\)|\S+)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() == logical_operator:
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return result

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT *
FROM table_name
ORDER BY column_name
LIMIT 5
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: not enough values to unpack (expected 2, got 1)

In [13]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            field, direction = order.strip().split()
            sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT *
FROM table_name
ORDER BY column_name
LIMIT 5
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: not enough values to unpack (expected 2, got 1)

In [15]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, and ORDER BY clauses.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse a single condition using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '...%'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Invalid condition: column_name LIKE '...%'

In [16]:
import re


def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles LIKE for pattern matching.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", ".*")  # Replace SQL '%' with MongoDB '.*' for regex
            return {field: {"$regex": pattern}}

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '...%'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$regex': '....*'}}, 'projection': {}}


In [17]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles LIKE for pattern matching.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", ".*")  # Replace SQL '%' with MongoDB '.*' for regex
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '....%'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$regex': '^.....*'}}, 'projection': {}}


In [19]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles LIKE for pattern matching.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", "")  # For LIKE '.....%', we match exactly 5 characters
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT COUNT(*) 
FROM table_name
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {}, 'projection': {'COUNT(*)': 1}}


In [20]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) via countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", "")  # For LIKE '.....%', we match exactly 5 characters
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        # Check if it's a COUNT(*) query
        match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+)", sql_query, re.IGNORECASE)
        if match_count:
            collection = match_count.group(1)
            return {
                'collection': collection,
                'countDocuments': {}
            }
        else:
            raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query for normal SELECT
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT COUNT(*) 
FROM table_name
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {}, 'projection': {'COUNT(*)': 1}}


In [21]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", "")  # For LIKE '.....%', we match exactly 5 characters
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) FROM table_name
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+)", sql_query, re.IGNORECASE)
    if match_count:
        collection = match_count.group(1)
        return {
            'collection': collection,
            'countDocuments': {}  # This will count all documents in the collection
        }

    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query for normal SELECT
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT COUNT(*) 
FROM table_name
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'countDocuments': {}}


In [27]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle parentheses for nested conditions
        if conditions.startswith("(") and conditions.endswith(")"):
            return parse_conditions(conditions[1:-1].strip())

        # Split on top-level AND/OR operators
        logical_operator, split_conditions = split_on_top_level_operator(conditions)
        if logical_operator:
            return {
                f"${logical_operator.lower()}": [parse_conditions(cond) for cond in split_conditions],
            }

        # Parse LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", "")  # For LIKE '.....%', we match exactly 5 characters
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        # Parse other conditions using regex
        match = re.match(r"(.+?)(=|!=|>=|<=|>|<)(.+)", conditions)
        if not match:
            raise ValueError(f"Invalid condition: {conditions}")

        field, operator, value = match.groups()
        field, value = field.strip(), value.strip()

        # Cast value appropriately
        if value.isdigit():
            value = int(value)
        elif value.replace(".", "", 1).isdigit():
            value = float(value)
        elif value.startswith("'") and value.endswith("'"):
            value = value.strip("'")
        else:
            raise ValueError(f"Unsupported value type in condition: {conditions}")

        # Map SQL operators to MongoDB operators
        operator_map = {
            "=": value,
            "!=": {"$ne": value},
            ">": {"$gt": value},
            ">=": {"$gte": value},
            "<": {"$lt": value},
            "<=": {"$lte": value},
        }
        return {field: operator_map[operator]}

    def split_on_top_level_operator(condition_str):
        """Split a condition string on the top-level AND/OR operator."""
        result = []
        bracket_level = 0
        current = []
        operator = None

        for token in re.split(r"(\s+|\(|\)|AND|OR)", condition_str):
            token = token.strip()
            if not token:
                continue
            if token == "(":
                bracket_level += 1
            elif token == ")":
                bracket_level -= 1
            elif bracket_level == 0 and token.upper() in {"AND", "OR"}:
                if operator and token.upper() != operator:
                    raise ValueError("Mixed logical operators at the same level are not supported.")
                operator = token.upper()
                result.append("".join(current).strip())
                current = []
                continue
            current.append(token)

        if current:
            result.append("".join(current).strip())
        return operator, result if operator else None

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) FROM table_name
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+)", sql_query, re.IGNORECASE)
    if match_count:
        collection = match_count.group(1)
        return {
            'collection': collection,
            'aggregate': [
                {'$count': 'total_count'}  # Use aggregation pipeline with $count
            ]
        }

    match = re.match(
        r"SELECT (.+?) FROM (\w+)( WHERE (.+?))?( ORDER BY (.+?))?( LIMIT (\d+))?$",
        sql_query,
        re.IGNORECASE,
    )
    if not match:
        raise ValueError("Unsupported query format.")

    # Extract query parts
    fields, collection, _, conditions, _, order_by, _, limit = match.groups()

    # Parse fields
    if fields.strip() == "*":
        projection = {}
    else:
        projection = {field.strip(): 1 for field in fields.split(",")}

    # Parse WHERE conditions
    mongo_filter = {}
    if conditions:
        mongo_filter = parse_conditions(conditions)

    # Parse ORDER BY clause
    sort = []
    if order_by:
        for order in order_by.split(","):
            order = order.strip()
            # Handle case where direction is not provided (defaults to ASC)
            if len(order.split()) == 1:
                sort.append((order, 1))  # Default to ascending order
            else:
                field, direction = order.split()
                sort.append((field, 1 if direction.upper() == "ASC" else -1))

    # Parse LIMIT clause
    limit = int(limit) if limit else None

    # Construct the MongoDB query for normal SELECT
    mongo_query = {
        "collection": collection,
        "filter": mongo_filter,
        "projection": projection,
    }
    if sort:
        mongo_query["sort"] = sort
    if limit is not None:
        mongo_query["limit"] = limit

    return mongo_query


# Example usage:
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name LIKE '...%'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'aggregate': [{'$count': 'total_count'}]}


In [31]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", ".*")  # For LIKE '.....%', we match any characters after '...'
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        return {}

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) WITH WHERE LIKE
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+) WHERE (.+)", sql_query, re.IGNORECASE)
    if match_count:
        collection, conditions = match_count.groups()
        
        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = parse_conditions(conditions)

        # MongoDB aggregation pipeline for counting documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {'$match': mongo_filter},  # Match documents based on the WHERE condition
                {'$count': 'total_count'}  # Count the number of matched documents
            ]
        }

    # If not a COUNT query, you can implement other SQL to MongoDB translations here

    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '%...'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [32]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            pattern = pattern.replace("%", ".*")  # For LIKE '%...' we match any characters after '...'
            return {field: {"$regex": f"^{pattern}"}}  # Add '^' to match the start of the string

        return {}

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) WITH WHERE LIKE
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+) WHERE (.+)", sql_query, re.IGNORECASE)
    if match_count:
        collection, conditions = match_count.groups()
        
        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = parse_conditions(conditions)

        # MongoDB aggregation pipeline for counting documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {'$match': mongo_filter},  # Match documents based on the WHERE condition
                {'$count': 'total_count'}  # Count the number of matched documents
            ]
        }

    # Handle SELECT * queries with WHERE clause and LIKE
    match_select = re.match(r"SELECT (.+?) FROM (\w+)( WHERE (.+))?", sql_query, re.IGNORECASE)
    if match_select:
        fields, collection, _, conditions = match_select.groups()

        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = {}
        if conditions:
            mongo_filter = parse_conditions(conditions)

        # Parse fields (projection)
        if fields.strip() == "*":
            projection = {}
        else:
            projection = {field.strip(): 1 for field in fields.split(",")}

        # MongoDB query for selecting documents with filters and projection
        return {
            'collection': collection,
            'filter': mongo_filter,
            'projection': projection
        }

    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '%...'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$regex': '^.*...'}}, 'projection': {}}


In [33]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            if pattern.startswith("%") and pattern.endswith("%"):
                # Both ends have '%' => any substring
                pattern = pattern[1:-1]
                return {field: {"$regex": f"{pattern}"}}  # Match anywhere in the string
            elif pattern.startswith("%"):  # LIKE '%...' (ends with three characters)
                pattern = pattern[1:]  # Remove the leading '%'
                return {field: {"$regex": f".*{pattern}$$"}}  # Ends with the specified characters
            elif pattern.endswith("%"):  # LIKE '...%' (starts with specific characters)
                pattern = pattern[:-1]  # Remove the trailing '%'
                return {field: {"$regex": f"^{pattern}.*"}}  # Starts with the specified characters
        return {}

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) WITH WHERE LIKE
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+) WHERE (.+)", sql_query, re.IGNORECASE)
    if match_count:
        collection, conditions = match_count.groups()
        
        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = parse_conditions(conditions)

        # MongoDB aggregation pipeline for counting documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {'$match': mongo_filter},  # Match documents based on the WHERE condition
                {'$count': 'total_count'}  # Count the number of matched documents
            ]
        }

    # Handle SELECT * queries with WHERE clause and LIKE
    match_select = re.match(r"SELECT (.+?) FROM (\w+)( WHERE (.+))?", sql_query, re.IGNORECASE)
    if match_select:
        fields, collection, _, conditions = match_select.groups()

        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = {}
        if conditions:
            mongo_filter = parse_conditions(conditions)

        # Parse fields (projection)
        if fields.strip() == "*":
            projection = {}
        else:
            projection = {field.strip(): 1 for field in fields.split(",")}

        # MongoDB query for selecting documents with filters and projection
        return {
            'collection': collection,
            'filter': mongo_filter,
            'projection': projection
        }

    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '%...'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$regex': '.*...$$'}}, 'projection': {}}


In [42]:
import re

def sql_to_mongo(sql_query):
    """
    Convert a more complex SQL query to a MongoDB query syntax.
    Supports SELECT, WHERE (with AND/OR), LIMIT, ORDER BY clauses,
    and also handles COUNT(*) using countDocuments or aggregation pipeline.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    def parse_conditions(conditions):
        """Recursively parse SQL WHERE conditions into MongoDB filters."""
        conditions = conditions.strip()

        # Handle LIKE conditions
        match_like = re.match(r"(.+?) LIKE '(.+?)'", conditions)
        if match_like:
            field, pattern = match_like.groups()
            field = field.strip()
            # Translate SQL LIKE to MongoDB regex
            if pattern.startswith("%") and pattern.endswith("%"):
                # Both ends have '%' => any substring
                pattern = pattern[1:-1]
                return {field: {"$regex": f"{pattern}"}}  # Match anywhere in the string
            elif pattern.startswith("%"):  # LIKE '%...' (ends with three characters)
                pattern = pattern[1:]  # Remove the leading '%'
                return {field: {"$regex": f".*{pattern}$"}}  # Ends with the specified characters
            elif pattern.endswith("%"):  # LIKE '...%' (starts with specific characters)
                pattern = pattern[:-1]  # Remove the trailing '%'
                return {field: {"$regex": f"^{pattern}.*"}}  # Starts with the specified characters
        return {}

    # Preprocess the query
    sql_query = re.sub(r"\s+", " ", sql_query.strip())
    
    # Check for SELECT COUNT(*) WITH WHERE LIKE
    match_count = re.match(r"SELECT COUNT\(\*\) FROM (\w+) WHERE (.+)", sql_query, re.IGNORECASE)
    if match_count:
        collection, conditions = match_count.groups()
        
        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = parse_conditions(conditions)

        # MongoDB aggregation pipeline for counting documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {'$match': mongo_filter},  # Match documents based on the WHERE condition
                {'$count': 'total_count'}  # Count the number of matched documents
            ]
        }

    # Handle SELECT * queries with WHERE clause and LIKE
    match_select = re.match(r"SELECT (.+?) FROM (\w+)( WHERE (.+))?", sql_query, re.IGNORECASE)
    if match_select:
        fields, collection, _, conditions = match_select.groups()

        # Parse WHERE conditions (e.g., LIKE '...%')
        mongo_filter = {}
        if conditions:
            mongo_filter = parse_conditions(conditions)

        # Parse fields (projection)
        if fields.strip() == "*":
            projection = {}
        else:
            projection = {field.strip(): 1 for field in fields.split(",")}

        # MongoDB query for selecting documents with filters and projection
        return {
            'collection': collection,
            'filter': mongo_filter,
            'projection': projection
        }

    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name value is BETWEEN 1 AND 3
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {}, 'projection': {}}


In [43]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT (.+) FROM (\w+) WHERE (\w+) BETWEEN (\d+) AND (\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, lower_bound, upper_bound = match_between.groups()

        # MongoDB Query
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$gte': int(lower_bound),
                    '$lte': int(upper_bound)
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name BETWEEN 1 AND 3
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [45]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, lower_bound, upper_bound = match_between.groups()

        # MongoDB Query
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$gte': int(lower_bound),
                    '$lte': int(upper_bound)
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name BETWEEN 1 AND 3
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$gte': 1, '$lte': 3}}, 'projection': {'COUNT(*)': 1}}


In [46]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN and COUNT(*) aggregation.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT COUNT\(\*\) FROM (\w+) WHERE (\w+) BETWEEN (\d+) AND (\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, lower_bound, upper_bound = match_count_between.groups()

        # MongoDB Aggregation Query to count documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$gte': int(lower_bound),
                            '$lte': int(upper_bound)
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name BETWEEN 1 AND 3
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [48]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN and COUNT(*) aggregation.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Updated regex to handle COUNT(*) with BETWEEN
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, lower_bound, upper_bound = match_count_between.groups()

        # MongoDB Aggregation Query to count documents with a filter
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$gte': int(lower_bound),
                            '$lte': int(upper_bound)
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name  != '...'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [50]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, and != condition.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()

        # MongoDB Query with != condition (translated to $ne)
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$ne': value
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name != '...'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {'column_name': {'$ne': '...'}}, 'projection': {'COUNT(*)': 1}}


In [52]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, and != condition.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query to count documents with != condition (translated to $ne)
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '%...%'
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [60]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, != condition, and LIKE operator.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '%...%'
    match_like = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like:
        fields, collection, column_name, pattern = match_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*")  # '%' is replaced by '.*' in MongoDB regex

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query to count documents with != condition (translated to $ne)
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name IS NULL
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [59]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, and LIKE operator.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '%...%'
    match_like = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like:
        fields, collection, column_name, pattern = match_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        # Escape the dots to match literal "..."
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query to count documents with != condition (translated to $ne)
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name IS NULL
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [62]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, and IS NULL.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '%...%'
    match_like = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like:
        fields, collection, column_name, pattern = match_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query to count documents with != condition (translated to $ne)
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()

        # MongoDB Query to check for NULL (translated to $eq: null)
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$eq': None
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT *
FROM table_name
WHERE column_name IS NOT NULL
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [67]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, and IS NULL or IS NOT NULL.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '%...%'
    match_like = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like:
        fields, collection, column_name, pattern = match_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query to count documents with != condition (translated to $ne)
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()

        # MongoDB Query to check for NULL (translated to $eq: null)
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$eq': None
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_not_null:
        fields, collection, column_name = match_is_not_null.groups()

        # MongoDB Query to check for NOT NULL (translated to $ne: null)
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$ne': None
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_query = """
SELECT COUNT(*) 
FROM table_name
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


ValueError: Unsupported query format.

In [68]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, and LIMIT.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '...%'
    match_like = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like:
        fields, collection, column_name, pattern = match_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like:
        collection, column_name, pattern = match_count_like.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Aggregation Query with $match and $count
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$regex': pattern
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()

        # MongoDB Query with sort and limit
        return {
            'collection': collection,
            'filter': {},  # No filter for this case
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],  # MongoDB sort: 1 for ascending, -1 for descending
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name (no WHERE)
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]

        # MongoDB Aggregation Query to count all documents in the collection
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$count': 'total_count'  # Count the total number of documents
                }
            ]
        }

    # Raise error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:

# 1. SELECT * FROM table_name ORDER BY column_name LIMIT 5;
sql_query = """
SELECT *
FROM table_name
ORDER BY column_name ASC
LIMIT 5;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 2. SELECT COUNT(*) FROM table_name;
sql_query = """
SELECT COUNT(*) 
FROM table_name;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 3. SELECT * FROM table_name WHERE column_name LIKE '...%';
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 4. SELECT COUNT(*) FROM table_name WHERE column_name LIKE '...%';
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)


{'collection': 'table_name', 'filter': {}, 'projection': {}, 'sort': [('column_name', 1)], 'limit': 5}
{'collection': 'table_name', 'aggregate': [{'$count': 'total_count'}]}
{'collection': 'table_name', 'filter': {'column_name': {'$regex': '\\.\\.\\.\\.*'}}, 'projection': {}}
{'collection': 'table_name', 'filter': {'column_name': {'$regex': '\\.\\.\\.\\.*'}}, 'projection': {'COUNT(*)': 1}}


In [69]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '...%'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Aggregation Query with $match and $count
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$regex': pattern
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()

        # Convert SQL LIKE '%...' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }
    
    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Aggregation Query with $match and $count
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$regex': pattern
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }
    
    
# 1. SELECT * FROM table_name ORDER BY column_name LIMIT 5;
sql_query = """
SELECT *
FROM table_name
ORDER BY column_name ASC
LIMIT 5;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 2. SELECT COUNT(*) FROM table_name;
sql_query = """
SELECT COUNT(*) 
FROM table_name;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 3. SELECT * FROM table_name WHERE column_name LIKE '...%';
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 4. SELECT COUNT(*) FROM table_name WHERE column_name LIKE '...%';
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)



None
None
{'collection': 'table_name', 'filter': {'column_name': {'$regex': '\\.\\.\\.\\.*'}}, 'projection': {}}
{'collection': 'table_name', 'filter': {'column_name': {'$regex': '\\.\\.\\.\\.*'}}, 'projection': {'COUNT(*)': 1}}


In [72]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * WHERE column_name LIKE '...%'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Aggregation Query with $match and $count
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$regex': pattern
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()

        # MongoDB Query with sort and limit
        return {
            'collection': collection,
            'filter': {},  # No filter for this case
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],  # MongoDB sort: 1 for ascending, -1 for descending
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name (no WHERE)
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]

        # MongoDB Aggregation Query to count all documents in the collection
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$count': 'total_count'  # Count the total number of documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()

        # Convert SQL LIKE '%...' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Query with $regex condition for LIKE
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$regex': pattern
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()

        # Convert SQL LIKE '%...%' to MongoDB regex
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        # MongoDB Aggregation Query with $match and $count
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$regex': pattern
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()

        # MongoDB Query for BETWEEN using $gte and $lte
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$gte': int(start),
                    '$lte': int(end)
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()

        # MongoDB Aggregation Query for BETWEEN using $gte and $lte
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$gte': int(start),
                            '$lte': int(end)
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()

        # MongoDB Query with $ne condition for inequality
        return {
            'collection': collection,
            'filter': {
                column_name: {
                    '$ne': value
                }
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        # MongoDB Aggregation Query with $match and $count for inequality
        return {
            'collection': collection,
            'aggregate': [
                {
                    '$match': {
                        column_name: {
                            '$ne': value
                        }
                    }
                },
                {
                    '$count': 'total_count'  # Count the number of matching documents
                }
            ]
        }

    #

    
    

In [74]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()

        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()

        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]

        return {
            'collection': collection,
            'aggregate': [
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        return {
            'collection': collection,
            'filter': {
                column_name: {'$regex': pattern}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'filter': {
                column_name: {'$regex': pattern}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$gte': int(start), '$lte': int(end)}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$ne': value}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$eq': None}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_not_null:
        fields, collection, column_name = match_is_not_null.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$ne': None}
           


SyntaxError: incomplete input (743017845.py, line 184)

In [76]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()

        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()

        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]

        return {
            'collection': collection,
            'aggregate': [
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and "." with "\."

        return {
            'collection': collection,
            'filter': {
                column_name: {'$regex': pattern}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'filter': {
                column_name: {'$regex': pattern}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$gte': int(start), '$lte': int(end)}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$ne': value}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()

        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$eq': None}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_not_null:
        fields, collection, column_name = match_is_not_null.groups()

        return {
            'collection': collection,
            'filter': {
                column_name: {'$ne': None}
            },
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # If no match, raise an error for unsupported queries
    raise ValueError("Unsupported query format.")

# Example usage:
sql_queries = [
    """
    SELECT *
    FROM table_name
    LIMIT 5;
    """,
   


SyntaxError: incomplete input (1007231361.py, line 198)

In [78]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]
        return {
            'collection': collection,
            'aggregate': [{'$count': 'total_count'}]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and escape "."
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$gte': int(start), '$lte': int(end)}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$ne': value}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$eq': None}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_not_null:
        fields, collection, column


In [80]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]
        return {
            'collection': collection,
            'aggregate': [{'$count': 'total_count'}]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and escape "."
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$gte': int(start), '$lte': int(end)}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$ne': value}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$eq': None}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+


SyntaxError: unterminated string literal (detected at line 152) (1917126499.py, line 152)

In [82]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]
        return {
            'collection': collection,
            'aggregate': [{'$count': 'total_count'}]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and escape "."
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$gte': int(start), '$lte': int(end)}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$ne': value}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$eq': None}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+


SyntaxError: unterminated string literal (detected at line 152) (1225077192.py, line 152)

In [84]:
import re

def sql_to_mongo(sql_query):
    """
    Convert SQL queries to MongoDB queries.
    Supports operations like aggregation for SQL queries with aggregate functions such as MAX, MIN.
    Handles conditions like BETWEEN, COUNT(*) aggregation, LIKE operator, IS NULL or IS NOT NULL,
    ORDER BY, LIMIT, and inequality checks.

    Args:
        sql_query (str): The SQL query string.

    Returns:
        dict: A dictionary representing the MongoDB query.
    """

    # Handle SQL SELECT * FROM table_name LIMIT 5;
    match_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_limit:
        fields, collection, limit = match_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'limit': int(limit)
        }

    # Handle SQL SELECT * FROM table_name ORDER BY column_name LIMIT 5;
    match_order_limit = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+ORDER\s+BY\s+(\w+)\s+(ASC|DESC)\s+LIMIT\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_order_limit:
        fields, collection, column_name, order, limit = match_order_limit.groups()
        return {
            'collection': collection,
            'filter': {},  # No filter
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {},
            'sort': [(column_name, 1 if order.upper() == 'ASC' else -1)],
            'limit': int(limit)
        }

    # Handle SQL SELECT COUNT(*) FROM table_name;
    match_count_all = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)", sql_query.strip(), re.IGNORECASE)
    if match_count_all:
        collection = match_count_all.groups()[0]
        return {
            'collection': collection,
            'aggregate': [{'$count': 'total_count'}]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '...%' or '%...'
    match_like_start = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_start:
        fields, collection, column_name, pattern = match_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")  # Replace "%" with ".*" and escape "."
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '...%'
    match_count_like_start = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_start:
        collection, column_name, pattern = match_count_like_start.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name LIKE '%...'
    match_like_end = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_like_end:
        fields, collection, column_name, pattern = match_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'filter': {column_name: {'$regex': pattern}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name LIKE '%...'
    match_count_like_end = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+LIKE\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_like_end:
        collection, column_name, pattern = match_count_like_end.groups()
        pattern = pattern.replace("%", ".*").replace(".", r"\.")
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$regex': pattern}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name BETWEEN 1 AND 3
    match_between = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_between:
        fields, collection, column_name, start, end = match_between.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$gte': int(start), '$lte': int(end)}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name BETWEEN 1 AND 3
    match_count_between = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+BETWEEN\s+(\d+)\s+AND\s+(\d+)", sql_query.strip(), re.IGNORECASE)
    if match_count_between:
        collection, column_name, start, end = match_count_between.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$gte': int(start), '$lte': int(end)}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name != '...'
    match_not_equal = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_not_equal:
        fields, collection, column_name, value = match_not_equal.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$ne': value}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT COUNT(*) WHERE column_name != '...'
    match_count_not_equal = re.match(r"SELECT\s+COUNT\(\*\)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+!=\s+'(.+?)'", sql_query.strip(), re.IGNORECASE)
    if match_count_not_equal:
        collection, column_name, value = match_count_not_equal.groups()
        return {
            'collection': collection,
            'aggregate': [
                {'$match': {column_name: {'$ne': value}}},
                {'$count': 'total_count'}
            ]
        }

    # Handle SQL SELECT * WHERE column_name IS NULL
    match_is_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_null:
        fields, collection, column_name = match_is_null.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$eq': None}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    # Handle SQL SELECT * WHERE column_name IS NOT NULL
    match_is_not_null = re.match(r"SELECT\s+(.+?)\s+FROM\s+(\w+)\s+WHERE\s+(\w+)\s+IS\s+NOT\s+NULL", sql_query.strip(), re.IGNORECASE)
    if match_is_not_null:
        fields, collection, column_name = match_is_not_null.groups()
        return {
            'collection': collection,
            'filter': {column_name: {'$ne': None}},
            'projection': {field.strip(): 1 for field in fields.split(',')} if fields.strip() != "*" else {}
        }

    raise ValueError("Unsupported query format.")


In [85]:
# 1. 
sql_query = """
SELECT *
FROM table_name
LIMIT 5;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 2. 
sql_query = """
SELECT *
FROM table_name
ORDER BY column_name 
LIMIT 5;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 3. 
sql_query = """
SELECT COUNT(*) 
FROM table_name;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 4. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 5. 
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name LIKE '...%';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 6. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name LIKE '%...';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 7. 
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name LIKE '%...';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 8 is not supported

# 9. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name value is BETWEEN 1 AND 3;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 10. 
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name BETWEEN 1 AND 3;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 11. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name  != '...';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 12. 
sql_query = """
SELECT COUNT(*)
FROM table_name
WHERE column_name != '...';
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 13 not supported

# 14 not supported

# 15
sql_query = """
SELECT *
FROM table_name
LIMIT 5;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 16. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name IS NULL;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

# 17. 
sql_query = """
SELECT *
FROM table_name
WHERE column_name IS NOT NULL;
"""
mongo_query = sql_to_mongo(sql_query)
print(mongo_query)

{'collection': 'table_name', 'filter': {}, 'projection': {}, 'limit': 5}


ValueError: Unsupported query format.

In [None]:
# transform that into mongodb query
# is this correct