In [0]:
-- Get size of source table. Table size and the specified partition size
-- are used to calculate the approximate size of each individual query.
use catalog identifier(:ctrl_catalog);
use schema identifier(:ctrl_schema);

declare or replace table_size_mb int;
call get_table_size(:src_type, :src_catalog, :src_schema, :src_table, table_size_mb);

select table_size_mb;

In [0]:
-- Get the lower and upper bound values of the partition column
use catalog identifier(:ctrl_catalog);
use schema identifier(:ctrl_schema);

declare or replace partition_col_type string;
declare or replace partition_col_bounds struct<lower: string, upper: string>;

call get_partition_col_bounds(:src_catalog, :src_schema, :src_table, :partition_col, partition_col_type, partition_col_bounds);

select partition_col_bounds.lower, partition_col_bounds.upper;

In [0]:
-- Calculate number of partitions. Minimum is 2.
declare or replace num_partitions integer;
set var num_partitions = greatest((table_size_mb / :partition_size_mb)::int, 2);

select num_partitions;

In [0]:
-- Get list of partitions (Query where clauses)
declare or replace num_batches int;
set var num_batches = ceiling(num_partitions / 1000);

select get_partition_list('inv_item_sk', 'date', unix_timestamp('2020-01-01', 'yyyy-MM-dd'), unix_timestamp('2025-12-31', 'yyyy-MM-dd'), num_partitions) as json_list
|> select explode(from_json(json_list, 'array<struct<id:int, where_clause:string>>')) AS col
|> select col.id, col.where_clause
|> extend ntile(num_batches) over (order by id) AS batch_number

In [None]:
-- Get list of batches
select distinct
  batch_id
from identifier(src_catalog || '.' || src_schema || '.' || src_table || '_partitions')
order by batch_id