In [1]:
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import ArrayType, StringType
spark = SparkSession.builder.getOrCreate()

In [10]:
spark.createDataFrame(pd.DataFrame({'values': [list(range(500))]}))

DataFrame[values: array<bigint>]

In [16]:
df = spark.createDataFrame(pd.DataFrame({'values': [list(range(10))]}))
sampleBlockCount = 3
# See: https://github.com/projectglow/glow/blob/354a40a576c7076affb085ff1e5a314f48c02e99/core/src/main/scala/io/projectglow/transformers/blockvariantsandsamples/VariantSampleBlockMaker.scala#L30
df = (
    df.withColumn(
        "fractionalSampleBlockSize",
        F.size(F.col('values')) / sampleBlockCount
    )
    .withColumn(
        'sample_block_id',
        F.explode(
          F.sequence(
            F.lit(1),
            F.lit(sampleBlockCount)
          )#.cast(ArrayType(StringType))
        )
    )
    .withColumn(
        'values_slice',
        F.expr(
          """slice(
                values,
                round((sample_block_id - 1) * fractionalSampleBlockSize) + 1,
                round(sample_block_id * fractionalSampleBlockSize) - round((sample_block_id - 1) * fractionalSampleBlockSize)
            )"""
        )
    )
)
df

DataFrame[values: array<bigint>, fractionalSampleBlockSize: double, sample_block_id: int, values_slice: array<bigint>]

In [18]:
df.show(10, truncate=False)

+------------------------------+-------------------------+---------------+------------+
|values                        |fractionalSampleBlockSize|sample_block_id|values_slice|
+------------------------------+-------------------------+---------------+------------+
|[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]|3.3333333333333335       |1              |[0, 1, 2]   |
|[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]|3.3333333333333335       |2              |[3, 4, 5, 6]|
|[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]|3.3333333333333335       |3              |[7, 8, 9]   |
+------------------------------+-------------------------+---------------+------------+

