<a href="https://colab.research.google.com/github/fedorov/idc-ghc-xfersyntax-testing/blob/main/IDC_J2K_debugging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://github.com/ImagingDataCommons/IDC-ProjectManagement/issues/2044

In [None]:

#@title Enter your Project ID
# initialize this variable with your Google Cloud Project ID!
my_ProjectID = "idc-sandbox-000" #@param {type:"string"}

import os
os.environ["GCP_PROJECT_ID"] = my_ProjectID

from google.colab import auth
auth.authenticate_user()

In [None]:
%%capture
!pip install --upgrade idc-index pydicom

In [None]:
# DICOMweb endpoints

DICOMWEB_ENDPOINTS = { \
    "IDC Proxy": "https://proxy.imaging.datacommons.cancer.gov/current/viewer-only-no-downloads-see-tinyurl-dot-com-slash-3j3d9jyp/dicomWeb",
    "Google Public Store v23": "https://healthcare.googleapis.com/v1/projects/nci-idc-data/locations/us-central1/datasets/idc/dicomStores/idc-store-v23/dicomWeb",
    "Google Public Store v22": "https://healthcare.googleapis.com/v1/projects/nci-idc-data/locations/us-central1/datasets/idc/dicomStores/idc-store-v22/dicomWeb",
    "IDC Internal Store": "https://healthcare.googleapis.com/v1/projects/canceridc-data/locations/us/datasets/idc/dicomStores/v23/dicomWeb"
}

In [None]:
from idc_index import IDCClient

client = IDCClient()

client.fetch_index("sm_instance_index")

In [None]:
DOWNLOADED_INSTANCES_DIR = "./gcs_downloaded"

In [None]:
from pathlib import Path
import pydicom as pd

class TestSMSample:
    def __init__(self, label, study, series, instance, frame, downloadDir=DOWNLOADED_INSTANCES_DIR):
        self.label = label
        self.study = study
        self.series = series
        self.instance = instance
        self.frame = frame

        self.idc_client = IDCClient()
        self.idc_client.fetch_index("sm_instance_index")
        self.downloadDir = downloadDir
        self.instance_path = None

    def __getitem__(self, key):
        return getattr(self, key)

    def __str__(self):
        details = f"TestSMSample(study={self.study}, series={self.series}, instance={self.instance}, frame={self.frame})"
        xfer_syntax = self.get_local_xfer_syntax()
        details += f"\nTransfer Syntax:  {xfer_syntax} {str(xfer_syntax.name)}"
        return details

    def download_instance(self, downloadDir=None):
        if downloadDir is None:
            downloadDir = self.downloadDir
        # create downloadDir if it does not exist
        directory_path = Path(downloadDir)
        directory_path.mkdir(parents=True, exist_ok=True)

        self.idc_client.download_dicom_instance(self.instance, downloadDir=downloadDir, dirTemplate="")

        # map SOPInstanceUID to crdc_instance_uid
        crdc_instance_uid = self.idc_client.sm_instance_index[
            self.idc_client.sm_instance_index['SOPInstanceUID'] == self.instance
        ]['crdc_instance_uuid']
        self.instance_path = Path(downloadDir) / f"{crdc_instance_uid.values[0]}.dcm"

    def get_local_xfer_syntax(self, downloadDir=None):
        if downloadDir is None:
            downloadDir = self.downloadDir
        if self.instance_path is None or not self.instance_path.exists():
            self.download_instance(downloadDir)
        local_file_path = self.instance_path
        dcm = pd.dcmread(local_file_path)
        return dcm.file_meta.TransferSyntaxUID


In [None]:
failing_slide.download_instance(DOWNLOADED_INSTANCES_DIR)

2026-01-14 11:53:06,210 - Disk size needed: 4.3 MB
2026-01-14 11:53:06,211 - Disk size available: 2.8 GB
2026-01-14 11:53:06,569 - Not using s5cmd sync as the destination folder is empty or sync or progress bar is not requested
2026-01-14 11:53:06,587 - Initial size of the directory: 0 bytes
2026-01-14 11:53:06,588 - Approximate size of the files that need to be downloaded: 4.3 MB
Downloading data:   0%|          | 0.00/4.30M [00:01<?, ?B/s]
2026-01-14 11:53:08,268 - Successfully downloaded files to /Users/af61/github/idc-ghc-xfersyntax-testing/gcs_downloaded


In [None]:
test_samples = []

test_samples.append(TestSMSample("failing slide", "2.25.302737996345872783571112300080988167697",
                 "1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.2.0",
                 "1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.29.0",
                 "56"))

test_samples.append(TestSMSample("succeeding slide",
                    "2.25.205318147612807799490440393069389220550",
                    "1.3.6.1.4.1.5962.99.1.826406969.1146508888.1727403292729.4.0",
                    "1.3.6.1.4.1.5962.99.1.826406969.1146508888.1727403292729.38.0",
                    "64"))

In [None]:
ACCEPT_HEADERS = {
  "slim accept header": "-H 'Accept: multipart/related; type=\"image/jls\"; transfer-syntax=1.2.840.10008.1.2.4.80, multipart/related; type=\"image/jls\"; transfer-syntax=1.2.840.10008.1.2.4.81, multipart/related; type=\"image/jp2\"; transfer-syntax=1.2.840.10008.1.2.4.90, multipart/related; type=\"image/jp2\"; transfer-syntax=1.2.840.10008.1.2.4.91, multipart/related; type=\"image/jpx\"; transfer-syntax=1.2.840.10008.1.2.4.92, multipart/related; type=\"image/jpx\"; transfer-syntax=1.2.840.10008.1.2.4.93, multipart/related; type=\"application/octet-stream\"; transfer-syntax=*, */*;q=0.1'",
  ".91 only accept header": "-H 'Accept: multipart/related; type=\"image/jp2\"; transfer-syntax=1.2.840.10008.1.2.4.91'"
}

def build_url(store, slide):
  return f"{store}/studies/{slide.study}/series/{slide.series}/instances/{slide.instance}/frames/{slide.frame}"

def build_curl_command(store, slide, accept_headers=""):
    url = build_url(store, slide)
    auth = "-H \"Authorization: Bearer $(gcloud auth print-access-token)\""
    command = f"curl -X GET {auth} {accept_headers} {url}"
    return command

curl_command = build_curl_command(DICOMWEB_ENDPOINTS['Google Public Store v23'], test_samples[0], ACCEPT_HEADERS['slim accept header'])
print("Curl command to download failing slide:")
print(curl_command)

Curl command to download failing slide:
curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H 'Accept: multipart/related; type="image/jls"; transfer-syntax=1.2.840.10008.1.2.4.80, multipart/related; type="image/jls"; transfer-syntax=1.2.840.10008.1.2.4.81, multipart/related; type="image/jp2"; transfer-syntax=1.2.840.10008.1.2.4.90, multipart/related; type="image/jp2"; transfer-syntax=1.2.840.10008.1.2.4.91, multipart/related; type="image/jpx"; transfer-syntax=1.2.840.10008.1.2.4.92, multipart/related; type="image/jpx"; transfer-syntax=1.2.840.10008.1.2.4.93, multipart/related; type="application/octet-stream"; transfer-syntax=*, */*;q=0.1' https://healthcare.googleapis.com/v1/projects/nci-idc-data/locations/us-central1/datasets/idc/dicomStores/idc-store-v23/dicomWeb/studies/2.25.302737996345872783571112300080988167697/series/1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.2.0/instances/1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.29.0/frames/

In [None]:
import subprocess

for slide in test_samples:
    print("\n" + str(slide))

    print("\nCurl commands to download slide from various endpoints:")
    for endpoint in DICOMWEB_ENDPOINTS:
        for accept_header in ACCEPT_HEADERS:
          print(f"\t{endpoint} / {accept_header}:")
          store = DICOMWEB_ENDPOINTS[endpoint]
          curl_command = build_curl_command(store, slide, ACCEPT_HEADERS[accept_header])
          print(curl_command)
          # run the command and print out result
          result = subprocess.run(curl_command, shell=True, capture_output=True)
          # try to decode stdout as UTF-8; if it fails, it's binary data (success)
          try:
            stdout_text = result.stdout.decode('utf-8')
            print(f"stdout (text): {stdout_text}")
          except UnicodeDecodeError:
            print(f"stdout: binary data ({len(result.stdout)} bytes)")



TestSMSample(study=2.25.302737996345872783571112300080988167697, series=1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.2.0, instance=1.3.6.1.4.1.5962.99.1.1250863857.1162905243.1637633436401.29.0, frame=56)
Transfer Syntax:  1.2.840.10008.1.2.4.91 JPEG 2000 Image Compression

Curl commands to download slide from various endpoints:
	IDC Proxy / slim accept header:
curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" -H 'Accept: multipart/related; type="image/jls"; transfer-syntax=1.2.840.10008.1.2.4.80, multipart/related; type="image/jls"; transfer-syntax=1.2.840.10008.1.2.4.81, multipart/related; type="image/jp2"; transfer-syntax=1.2.840.10008.1.2.4.90, multipart/related; type="image/jp2"; transfer-syntax=1.2.840.10008.1.2.4.91, multipart/related; type="image/jpx"; transfer-syntax=1.2.840.10008.1.2.4.92, multipart/related; type="image/jpx"; transfer-syntax=1.2.840.10008.1.2.4.93, multipart/related; type="application/octet-stream"; transfer-syntax=*, */*;