<img src="./images/pipe.png" alt="pipe.png" width="700px">

In [0]:
CREATE OR REPLACE TEMPORARY VIEW users (name, age) AS
VALUES
  ('Alice', 31),
  ('Bob', 28),
  ('Cindy', 35);

In [0]:
FROM users
|> WHERE age > 30;

In [0]:
-- 1. Define variables for parts of the query
DECLARE base_table STRING DEFAULT 'users';
DECLARE age_filter STRING DEFAULT 'age  > 30';

-- 2. Build the pipeline query string using concatenation
DECLARE pipeline_sql STRING;
SET VARIABLE pipeline_sql = 
  'FROM ' || base_table || 
  ' |> WHERE ' || age_filter || 
  ' |> SELECT name, age'; 

-- You can inspect the constructed query
SELECT pipeline_sql; -- FROM users |> WHERE age  > 30 |> SELECT name, age

-- 3. Execute the dynamically constructed SQL query
EXECUTE IMMEDIATE pipeline_sql;

In [0]:
%python
class QueryBuilder:
    def __init__(self, table):
        self.table = table      # Table name for FROM clause
        self.filter = None      # WHERE condition (optional)
        self.limit = None       # LIMIT value (optional)
    
    def set_filter(self, condition):
        """Specify a WHERE condition."""
        self.filter = condition
    
    def set_limit(self, n):
        """Specify a LIMIT for the query."""
        self.limit = n
    
    def build_pipeline_sql(self):
        """Assemble the SQL pipeline string based on the set parts."""
        query = f"FROM {self.table}"
        if self.filter:
            query += f" |> WHERE {self.filter}"
        if self.limit:
            query += f" |> LIMIT {self.limit}"
        return query
    
    def execute(self):
        """Execute the built query using a Spark session (Databricks)."""
        sql_query = self.build_pipeline_sql()
        print(f"Executing: {sql_query}")
        return spark.sql(sql_query)

# Example usage:
qb = QueryBuilder("users")
qb.set_filter("age > 30")
qb.set_limit(10)
result_df = qb.execute()  # This will run: FROM users |> WHERE age > 30 |> LIMIT 10
result_df.show()