In [1]:
!pip install dcm2niix pyorthanc pydicom-seg

Collecting dcm2niix
  Downloading dcm2niix-1.0.20220715.tar.gz (451 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m451.4/451.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting pyorthanc
  Obtaining dependency information for pyorthanc from https://files.pythonhosted.org/packages/9c/42/a5512e40e015fea7c19e90ba12f39ae0083bcc94a3c93b225103ae781c34/pyorthanc-1.12.2-py3-none-any.whl.metadata
  Downloading pyorthanc-1.12.2-py3-none-any.whl.metadata (4.9 kB)
Collecting miutil[web] (from dcm2niix)
  Obtaining dependency information for miutil[web] from https://files.pythonhosted.org/packages/8e/75/b4639b18b42d6c8b744b47c3f66c2efd029e7198beed88870c7cb9201f0a/miutil-0.12.0-py3-none-any.whl.metadata
  Downloading miutil-0.12.0-py3-none-any.whl.metadata (4.7 kB)
Collecting httpx

In [2]:
import pydicom
import glob
import pandas as pd
import pydicom_seg

import SimpleITK as sitk
import tempfile
import shutil


from datetime import datetime
from pyorthanc import Orthanc

import os
import base64
import hashlib
import json
import sys
import urllib
import subprocess as sp

from typing import Literal
from httpx import HTTPError


def pad_string_with_dashes(string: str,
                           distance: int = 8):
    num_dashes = len(string) // distance

    if len(string) % distance == 0:
        num_dashes -= 1

    substrings = []
    for dash_id in range(num_dashes):
        substrings.append(f'{string[dash_id * 8:(dash_id + 1) * 8]}-')
    substrings.append(string[num_dashes * 8:len(string)])

    return ''.join(substrings)


def get_orthan_series_id(patient_id: str, study_instance_uid: str, series_instance_uid: str):
    raw_identifier = f"{patient_id}|{study_instance_uid}|{series_instance_uid}"
    orthanc_id = hashlib.sha1(raw_identifier.encode('utf-8')).hexdigest()
    orthanc_id = pad_string_with_dashes(orthanc_id, distance=8)
    return orthanc_id


def get_orthan_study_id(patient_id: str, study_instance_uid: str):
    raw_identifier = f"{patient_id}|{study_instance_uid}"
    orthanc_id = hashlib.sha1(raw_identifier.encode('utf-8')).hexdigest()
    orthanc_id = pad_string_with_dashes(orthanc_id, distance=8)
    return orthanc_id

    
def convert_dicom(target_dir, filename, to_convert, convert_to='nifti_gz', method='dcm2niix', force=False):
    os.makedirs(target_dir, exist_ok=True)

    if convert_to == 'nrrd':
        print('\nConversion from DICOM to NRRD...')
        ext = '.nrrd'
        if method=='dcm2niix':
            cmd = ("dcm2niix -o {0} -f {1} -e y {2}".format(target_dir, filename, to_convert))
        else:
            raise Exception('Not recognized {} method to convert from DICOM to NRRD.'.format(method))
    elif convert_to == 'nifti_gz':
        print('\nConversion from DICOM to NIFTI_GZ...')
        ext = '.nii.gz'
        if method == 'dcm2niix':
            if force:
                cmd = ("dcm2niix -o {0} -f {1} -z y -p n -m y {2}".format(target_dir, filename,
                                                                     to_convert))
            else:
                cmd = ("dcm2niix -o {0} -f {1} -z y -p n {2}".format(target_dir, filename,
                                                                     to_convert))
        else:
            raise Exception('Not recognized {} method to convert from DICOM to NIFTI_GZ.'.format(method))
    else:
        raise NotImplementedError('The conversion from DICOM to {} has not been implemented yet.'
                                  .format(convert_to))
    try:
        sp.check_output(cmd, shell=True)
        print('\nImage successfully converted!')
    except:
        print('Conversion failed. Scan will be ignored.')

In [2]:
segmentations = glob.glob("/data/oleksii/alta-ai-orthanc-backup/2023_08_15_full/prostate_zone/ProcessingState.PROCESSED/*/seg.dcm")
len(segmentations)

1210

In [8]:
# Remote client
# orthanc_remote = Orthanc('http://localhost:52052')
# orthanc_remote.setup_credentials('radiology', 'm8UwpwqBSvBUszUffq88')  # If needed

# Initialize orthanc client
orthanc = Orthanc('http://localhost:8042')
orthanc.setup_credentials('dev-user-alta', 'SyTP&8JbKFx@a6R65^sE`Z$') 


In [25]:
# Function that retrieves the referenced series of the segmentation file
def get_referenced_series(orthanc_client, dataset):
    patientID = dataset[(0x0010, 0x0020)].value
    seriesInstanceUID = dataset[(0x0020, 0x000d)].value
    refSeriesInstanceUID = dataset[(0x0008,0x1115)][0][(0x0020, 0x000e)].value
    series_identifier = get_orthan_series_id(patientID, seriesInstanceUID, refSeriesInstanceUID)

    try: 
        series_info = orthanc_client.get_series_id(series_identifier)
        referenced_instances = series_info['Instances']
        files = [orthanc_client.get_instances_id_file(instance_id) for instance_id in referenced_instances]
        
    except HTTPError as err:
#         if err == 404:
        print(f"could not retrieve the referenced dicoms {series_identifier}")
        return None, None, None                   
        
    
    
    print(f"Retrieved {len(files)} istances..")
    return series_identifier, referenced_instances, files



In [27]:
target_dir = "/data/oleksii/Prostate-ZONE-Datasets-NRRDS/ALTA-Zone-Dataset-new/"

# Iterate over available segmentations 
meta_info = []
for i, seg_path in enumerate(segmentations[:]):
    orthancID = seg_path.split("/")[-2]
    dataset = pydicom.dcmread(seg_path)
    print(f"processing {i}/{len(segmentations)} {orthancID}...")
    
    series_id, instances_ids, instances = get_referenced_series(orthanc, dataset)
    if series_id is None:
        continue 
    
    with tempfile.TemporaryDirectory() as tmpdirname:
        print('created temporary directory', tmpdirname)
        for instance_bytes, ids in zip(instances, instances_ids):
            with open(os.path.join(tmpdirname, ids), 'wb') as f: 
                f.write(instance_bytes)
    
        target_dir_tmp = os.path.join(target_dir, series_id)
        os.makedirs(target_dir_tmp, exist_ok=True)
        convert_dicom(target_dir=target_dir_tmp, filename="tra_t2w", to_convert=tmpdirname, convert_to="nrrd")
    shutil.copyfile(seg_path, os.path.join(target_dir_tmp, os.path.basename(seg_path)))
    
    reader = pydicom_seg.MultiClassReader()
    result = reader.read(dataset)

    image_data = result.data  # directly available
    image = result.image  # lazy construction
    sitk.WriteImage(image, os.path.join(target_dir_tmp, 'Segmentation-label.seg.nrrd'), True)
        
    

processing 0/1210 c105015c-0b8b4cbc-a17b045b-0538fce3-ea0e02de...
Retrieved 19 istances..
created temporary directory /tmp/tmpgyx25nct

Conversion from DICOM to NRRD...

Image successfully converted!
processing 1/1210 93d10d3e-adf2d3e5-651b9fc2-01dcd70a-06618158...
Retrieved 16 istances..
created temporary directory /tmp/tmpkk0g5c00

Conversion from DICOM to NRRD...

Image successfully converted!
processing 2/1210 66400873-4d62f825-a061b9c2-3b65ec4d-d53e449b...
Retrieved 15 istances..
created temporary directory /tmp/tmpa_v6qirb

Conversion from DICOM to NRRD...

Image successfully converted!
processing 3/1210 e3bac28b-28235cb8-ff74f2b0-80861c06-5fd68257...
Retrieved 21 istances..
created temporary directory /tmp/tmpw16vqz9l

Conversion from DICOM to NRRD...

Image successfully converted!
processing 4/1210 ed0d6512-fa250153-7b59a9e6-3fd5250b-fe4040aa...
Retrieved 15 istances..
created temporary directory /tmp/tmp3zt3j8gw

Conversion from DICOM to NRRD...

Image successfully converted!


In [None]:
# could not retrieve the referenced dicoms 9b806df7-7975477f-88409944-1b9c06c6-36da088d
# could not retrieve the referenced dicoms c2408e5b-8040a604-445b28e8-06e5e441-25c545ef
# could not retrieve the referenced dicoms 7038dd58-7d88e061-e31882d2-1ee10535-dd2a1783


In [19]:
seg = segmentations[0]

for seg in segmentations:
    segment_sequence = (0x0062,0x0002)
    dataset = pydicom.dcmread(seg)
    for i, segment in enumerate(dataset[segment_sequence]):
        print(segment)
        print(i, segment[(0x0062, 0x0006)])
    break

(0062, 0003)  Segmented Property Category Code Sequence  1 item(s) ---- 
   (0008, 0100) Code Value                          SH: '123037004'
   (0008, 0102) Coding Scheme Designator            SH: 'SCT'
   (0008, 0104) Code Meaning                        LO: 'Anatomical Structure'
   ---------
(0062, 0004) Segment Number                      US: 1
(0062, 0005) Segment Label                       LO: 'Organ'
(0062, 0006) Segment Description                 ST: '{"lastEditor": "02b5a45a-dae4-4c71-835b-ba41c05b3e69", "volume": 15.0, "PredictionQuality": 1, "Nodes": null, "ZoneMeasures": [null, null, null]}'
(0062, 0007)  Segmentation Algorithm Identification Sequence  1 item(s) ---- 
   (0066, 002f)  Algorithm Family Code Sequence  1 item(s) ---- 
      (0008, 0100) Code Value                          SH: '123109'
      (0008, 0102) Coding Scheme Designator            SH: 'DCM'
      (0008, 0104) Code Meaning                        LO: 'Manual Processing'
      ---------
   (0066, 0031) A

In [12]:
for zone in zone_segms:
    to_convert = os.path.dirname(zone)
    target_dir = to_convert.replace("prostate_zone", "prostate_zone_converted")
    convert_dicom_seg(target_dir, filename="seg", to_convert=to_convert, convert_to="nrrd")


Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to NRRD...

Image successfully converted!

Conversion from DICOM to

In [78]:

def get_lines(path):
    with open(path, "r") as file:
        lines = file.readlines()
        return [line.strip() for line in lines]

def write_list(list_to_write, path):
    # open file in write mode
    with open(path, 'w') as fp:
        for item in list_to_write:
            # write each item on a new line
            fp.write("%s\n" % item)
        print('Done')
    # assign_user_to_stu

In [7]:
# dataset

### Fetchins studies from alta-ai

In [3]:
# orthanc writer need to be mapped:

orthanc_remote = Orthanc('http://localhost:52052')
orthanc_remote.setup_credentials('radiology', 'm8UwpwqBSvBUszUffq88')  # If needed


In [4]:
len(orthanc_remote.get_studies())

1043

In [6]:
path = "upload_batch_20230727.csv"
dicom_meta_data = pd.read_csv(path, sep=";")
dicom_meta_data['study_identifier'] = dicom_meta_data.apply(lambda x: get_orthan_study_id(x.PatientID, x.StudyInstanceUID), axis=1)

# apply filter
# dicom_meta_data = dicom_meta_data[~dicom_meta_data["StudyDate"].isna()]
# dicom_meta_data["StudyDate"] = dicom_meta_data["StudyDate"].astype(int).astype(str)
# dicom_meta_data = dicom_meta_data[dicom_meta_data["StudyDate"].str.startswith("2023")]
study_ids = dicom_meta_data["StudyInstanceUID"].tolist()
print(f"to upload {len(study_ids)} cases:")

to upload 3638 cases:


In [8]:

dicom_meta_data.to_csv("/home/oleksii/projects/alta-backend/export_befundtexte.csv", sep=";")

In [64]:
idx = 0
patient_id = dicom_meta_data["PatientID"][idx]
study_instance_uid = dicom_meta_data["StudyInstanceUID"][idx]

In [73]:
path = "/home/oleksii/projects/alta-backend/unassigned.txt"
path_all = "/home/oleksii/projects/alta-backend/all_studies.txt"

unassigned = get_lines(path)
remote_studies = get_lines(path_all)

In [159]:
len(remote_studies)

3375

In [50]:
local_studies = dicom_meta_data['study_identifier'].to_list()[:500]

In [74]:
intersection_studies = list(set(local_studies) & set(remote_studies))

In [75]:
intersection_studies = list(set(local_studies) & set(unassigned))

In [90]:
intersection_studyiuid = [orthanc_remote.get_studies_id(sidx)["MainDicomTags"]["StudyInstanceUID"] for sidx in intersection_studies]

In [94]:
direktlinks = [f"https://alta-ai.com/viewer/{i}" for i in intersection_studyiuid]
write_list(direktlinks, "export_niklas_20230815.txt")

Done


In [104]:
befundtexte = dicom_meta_data[dicom_meta_data["StudyInstanceUID"].isin(intersection_studyiuid)]


befundtexte.to_csv("/home/oleksii/projects/alta-backend/export_niklas_20230815_befundtexte.csv", sep=";")

In [91]:
write_list(intersection_studyiuid, "/home/oleksii/projects/alta-backend/export_niklas.txt")

Done


In [36]:
study_identifier = get_orthan_study_id(patient_id, study_instance_uid)

In [80]:
study_info = orthanc_remote.get_studies_id(study_identifier)
