In [0]:
%pip install -r ../requirements.txt
%pip install opencv-python

In [0]:
dbutils.library.restartPython()

In [0]:
from dbx.pixels import Catalog
from dbx.pixels.dicom import DicomMetaExtractor
from dbx.pixels.dicom.redactor import Redactor
from pyspark.sql.functions import expr, explode, col, current_timestamp, lit

In [0]:
table = 'ema_rina.pixels_solacc_gehc.object_catalog'
volume = 'ema_rina.pixels_solacc_gehc.pixels_volume'
dest_base_path = '/redacted'

catalog = Catalog(spark, table=table, volume=volume)

In [0]:
redactor = Redactor(spark=spark)

query = redactor.process_from_table(
  source_table=table+"_redaction",
  volume=volume,
  dest_base_path=dest_base_path,
  checkpoint_location=f"{catalog._volume_path}/_checkpoints/redactor/",
  trigger_available_now=True,
)

query.awaitTermination()

In [0]:
catalog.catalog(path=catalog._volume_path, streaming=True, streamCheckpointBasePath=f"{catalog._volume_path}/_checkpoints/redactor_catalog/")

redacted_df = (
  spark.readStream
  .option("readChangeFeed", "true")
  .option("startingVersion", "0")
  .table(table+"_redaction")
  .filter("_change_type = 'update_postimage'")
  .filter("status = 'SUCCESS'")
  .withColumn('file_path', explode(col("output_file_paths")))
  .selectExpr("concat('dbfs:', nullif(file_path, '')) as path").where('path is not null')
)

catalog_df = Catalog._with_path_meta(redacted_df)

meta_df = DicomMetaExtractor(catalog).transform(catalog_df)
meta_df = meta_df\
  .withColumn("modificationTime", current_timestamp())\
  .withColumn("length", lit(0)) # Avoid to calculate to impr perf


catalog.save(meta_df, mode="append")