In [0]:
USE SCHEMA `202507`;

In [0]:
CREATE TABLE archived_orders (
    order_id INT,
    order_date DATE,
    customer_id INT,
    amount DECIMAL(10, 2)
);

CREATE TABLE orders (
    order_id INT,
    order_date DATE,
    customer_id INT,
    amount DECIMAL(10, 2)
);

INSERT INTO orders (order_id, order_date, customer_id, amount) VALUES
(1, '2023-05-15', 101, 250.00),
(2, '2023-11-20', 102, 150.00),
(3, '2025-02-10', 103, 300.00),
(4, '2025-04-25', 104, 450.00);

In [0]:
%python
from pyspark.sql.functions import expr
mergedDF = (
    spark.table("orders")
    .mergeInto(
        "archived_orders",
        expr("archived_orders.order_id = orders.order_id")
    )
    .whenNotMatched()
    .insertAll()
    .merge()
)

Procedures - more https://databrickster.medium.com/stored-procedures-in-databricks-95136fb5e273

In [0]:
CREATE PROCEDURE `202507`.archive_old_orders(cut_date DATE)
LANGUAGE SQL
SQL SECURITY INVOKER
AS BEGIN
    INSERT INTO archived_orders
        SELECT * FROM orders WHERE order_date < cut_date;

    DELETE FROM orders WHERE order_date < cut_date;
END;

In [0]:
CALL archive_old_orders(date'2024-01-01')

Recursive CTE

In [0]:
DROP VIEW IF EXISTS routes;
CREATE TEMPORARY VIEW routes(origin, destination) AS VALUES
  ('New York', 'Washington'),
  ('New York', 'Boston'),
  ('Boston', 'New York'),
  ('Washington', 'Boston'),
  ('Washington', 'Raleigh');

WITH RECURSIVE destinations_from_new_york AS (
    SELECT 'New York' AS destination, ARRAY('New York') AS path, 0 AS length
    UNION ALL
    SELECT r.destination, CONCAT(d.path, ARRAY(r.destination)), d.length + 1
      FROM routes AS r
      JOIN destinations_from_new_york AS d
        ON d.destination = r.origin AND NOT ARRAY_CONTAINS(d.path, r.destination)
  )
SELECT * FROM destinations_from_new_york;

Allow non-deterministic expressions in UPDATE/INSERT column values for MERGE operations

In [0]:
MERGE INTO orders AS target USING archived_orders AS source
ON target.order_id = source.order_id
WHEN MATCHED THEN UPDATE SET target.amount = source.amount + rand()