In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 2 Setup

The following installs and imports **[tcia_utils](https://pypi.org/project/tcia-utils/)**, which contains a variety of useful functions for accessing TCIA via Python and Juptyter Notebooks.  It also ensures that the necessary imports are performed and logging settings are adjusted for Google Colab.

In [5]:
import sys

# install tcia utils
!{sys.executable} -m pip install --upgrade -q tcia_utils
!pip install pydicom

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/235.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.7/117.7 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import requests
import pandas as pd
from tcia_utils import nbia

# set logging level to INFO in Google Colab (not necessary in Jupyter)
if 'google.colab' in sys.modules:
  import logging

  for handler in logging.root.handlers[:]:
      logging.root.removeHandler(handler)

  # Set handler with level = info
  logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
                      level=logging.INFO)

  print("Google Colab Logging set to INFO")

: 

# 4 Download and visualize a sample DICOM RTSTRUCT
RTSTRUCT is another common format used to save segmentations.  Let's take a look at the [Annotations for The Clinical Proteomic Tumor Analysis Consortium Pancreatic Ductal Adenocarcinoma Collection (CPTAC-PDA-Tumor-Annotations) dataset](https://doi.org/10.7937/BW9V-BX61) as an example.  This [Analysis Result](https://www.cancerimagingarchive.net/tcia-analysis-results/) dataset analyzed images from the [CPTAC-PDA](https://doi.org/10.7937/K9/TCIA.2018.SC20FO18) collection.

This time around, let's use the **modality** parameter in getSeries() to only return the RTSTRUCT series.

In [None]:
## To get metadata
df = nbia.getSeries(collection = "CPTAC-UCEC", modality =   "RTSTRUCT", format = "df")


If you look at the Series Description column you'll note that in some cases these RTSTRUCT series are listed as "seed point" or "no finding".  These ones would not be particularly useful to visualize so let's make sure to avoid those. You can update the code below to use any of the other series UIDs you prefer, but let's start with **1.2.826.0.1.534147.667.2747872357.2023429821032.4** which has a description of **"Pre-dose, PANCREAS - 1"**.

In [8]:
segSeries = "1.2.826.0.1.534147.756.812677238.202323194447456"

Next, let's determine the Reference Series Instance UID of the CT scan that goes with the segmentation.

In [9]:
refSeries = nbia.getSegRefSeries(segSeries)

print(refSeries)

2024-07-30 06:50:34,352:INFO:Success - Token saved to api_call_headers variable and expires at 2024-07-30 08:50:34.352279
2024-07-30 06:50:34,356:INFO:Accessing Advanced API anonymously. To access restricted data use nbia.getToken() with your credentials.
2024-07-30 06:50:34,359:INFO:Calling... https://services.cancerimagingarchive.net/nbia-api/services/getDicomTags with parameters {'SeriesUID': '1.2.826.0.1.534147.756.812677238.202323194447456'}


1.3.6.1.4.1.14519.5.2.1.6450.2626.646366290968362266256965356421


Now let's download these two series.  

In [10]:
nbia.downloadSeries([refSeries, segSeries], input_type= "list", format = "df")

2024-07-30 06:50:34,778:INFO:Downloading 2 Series Instance UIDs (scans).
2024-07-30 06:50:34,783:INFO:Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?NewFileNames=Yes&SeriesInstanceUID=1.3.6.1.4.1.14519.5.2.1.6450.2626.646366290968362266256965356421
2024-07-30 06:53:58,172:INFO:Downloading... https://services.cancerimagingarchive.net/nbia-api/services/v1/getImage?NewFileNames=Yes&SeriesInstanceUID=1.2.826.0.1.534147.756.812677238.202323194447456
2024-07-30 06:53:58,791:INFO:Downloaded 2 out of 2 Series Instance UIDs (scans).
0 failed to download.
0 previously downloaded.


Unnamed: 0,Series UID,Collection,Data Description URI,Subject ID,Study UID,Study Description,Study Date,Series Description,Manufacturer,Modality,...,Annotation Size,Date Released,Series Date,Protocol Name,Annotations Flag,Manufacturer Model Name,Software Versions,TimeStamp,3rd Party Analysis,Body Part Examined
0,1.3.6.1.4.1.14519.5.2.1.6450.2626.646366290968...,CPTAC-UCEC,https://doi.org/10.7937/K9/TCIA.2018.3R3JUISW,C3L-00771,1.3.6.1.4.1.14519.5.2.1.6450.2626.142781207978...,CT ABDOMEN AND PELVIS,06-22-2008,Thins,GE MEDICAL SYSTEMS,CT,...,0,Thu Sep 19 14:51:26 UTC 2019,Sun Jun 22 00:00:00 UTC 2008,6.1 Abdomen Pelvis w/o,False,LightSpeed VCT,gmp_vct.42,Thu Sep 19 14:51:26 UTC 2019,,
1,1.2.826.0.1.534147.756.812677238.202323194447456,CPTAC-UCEC,https://doi.org/10.7937/89M3-KQ43,C3L-00771,1.3.6.1.4.1.14519.5.2.1.6450.2626.142781207978...,CT ABDOMEN AND PELVIS,06-22-2008,Pre-dose UTERUS - 1,Open Health Imaging Foundation,RTSTRUCT,...,0,Mon Jul 24 00:00:00 UTC 2023,Fri Mar 31 00:00:00 UTC 2023,,False,OHIF-XNAT Viewer 3.2.0,gmp_vct.42,Fri Jul 14 19:50:39 UTC 2023,yes,ABDOMEN


Finally, we can look at the images and segmentation together.  You can move the slider to flip through the images and toggle the segmentation layer on/off.

In [11]:
nbia.viewSeriesAnnotation(seriesUid = refSeries, annotationUid = segSeries)



In [48]:
##for reading DCM metadata

from pydicom import dcmread

ds = dcmread('/content/1-1.dcm')

ds_ = ds[0x3006, 0x0010][0]
ds_ = ds_[0x3006, 0x0012][0]
ds_ = ds_[0x3006, 0x0014][0]
ds_ = ds_[0x0020, 0x000e].value
# ds_ = ds_[0x3006, 0x0014]
print( ds_)

# dsl = [ (str(element)[:12], element) for element in ds]
# for _,seq in dsl:
#   if _=='(3006, 0080)':
#     for element in seq:
#       print( element)

1.3.6.1.4.1.14519.5.2.1.6450.2626.646366290968362266256965356421
