# Overview
Index the compressed file with Pixels in prep for display

## Requirements
$./requirements.txt

Compute: Serverless CPU Notebooks

In [0]:
import os
import dbx

%pip install \
    numpy==1.26.4 \
    pydicom==3.0.1 \
    pylibjpeg \
    pylibjpeg-libjpeg \
    pylibjpeg-openjpeg \
    fsspec


dbutils.library.restartPython()

In [0]:
import yaml
import pprint

cfg = yaml.safe_load(open("config.yaml", "r"))
input_path = cfg.get("input_path")
output_path = cfg.get("output_path")
table = cfg.get("table")
volume = cfg.get("volume")
write_mode = cfg.get("write_mode")

pprint.pprint(cfg, indent=4)
print(output_path)

In [0]:
import os
os.environ["PYLIBJPEG_OPENJPEG"] = "1"
import pydicom
import pylibjpeg # very important import

In [0]:
spark.sql(f"DROP TABLE IF EXISTS `{table}`")

In [0]:
def escape_table(table: str) -> str:
    parts = table.split('.')
    return f"`{parts[0]}`.`{parts[1]}`.{parts[2]}"

escape_table(table)

In [0]:
from dbx.pixels import Catalog
from dbx.pixels.dicom import DicomMetaExtractor # The Dicom transformers

if os.path.exists(output_path):
    catalog = Catalog(
        spark,
        table=escape_table(table),
        volume=volume)

    catalog_df = catalog.catalog(path=output_path)
    meta_df = DicomMetaExtractor(catalog, deep=False).transform(catalog_df)
    catalog.save(meta_df, mode=write_mode)


In [0]:
display(spark.table(escape_table(table)))

In [0]:
# %sql truncate table `hls_radiology`.`tcia`.`object_catalog_htj2k_v8`;

In [0]:
%sql
select count(1)
from `hls_radiology`.`tcia`.`object_catalog_htj2k_v8`