# Lab 3


In [None]:
"""
Created by:  Charity Grey (2025)
Modified by:  [Your Name] (2025)
"""


# Install required packages if needed
# !pip install SimpleITK
from tcia_utils import nbia
import pandas as pd
import SimpleITK as sitk
import numpy as np
import os
import matplotlib.pyplot as plt

### DICOM

DICOM is Digital Imaging and Communications in Medicine — is the international standard for medical images and related information
- OPTIONAL 10 min video: just for ur knowledge https://www.youtube.com/watch?v=-c9KhcGYTG4 

### Visit The Cancer Imaging Archive (TCIA)
Skim this: https://www.cancerimagingarchive.net/browse-collections/ 

#### Download the data using their web API

Q: What is an API? Watch this 3 min video to understand:
https://www.youtube.com/watch?v=-0MmWEYR2a8 

Ans: An application programming interface is a connection between computers or between computer programs. It is a type of software interface, offering a service to other pieces of software


#### How to use APIs? 

- In bioinformatics, a lot of APIs are used for downloading and accessing data
- Each api is different, but you always make some sort of a request. ie: in English, the request "Where" tells you the answer you want is a location.

How you format your request can achieved via reading documention for that API.

Our reference documentation here:
- https://wiki.cancerimagingarchive.net/display/Public/TCIA+Programmatic+Interface+%28REST+API%29+Usage+Guide+v2 
- if you would like to follow their optional tutorial: https://github.com/kirbyju/TCIA_Notebooks/blob/main/TCGA/TCGA_Clinical.ipynb 


Fortunately for us, the TCIA also has a python library that basically has functions that access these apis directly. So we will be doing that.

### YOU ARE NOT EXPECTED TO REPRODUCE THIS. JUST READ IT AND FOLLOW ALONG AS YOU RUN THE CELLS

The reason why is because sometimes, bioinformatics processes are NOT just about analysis. It's also about getting the data and cleaning it/manipulating it to be in the right format for analysis
- this is called data engineering (data wrangling if you are more in data science)

In [None]:

# we are going to get list of all the image collections from the TCIA api
collections_json = nbia.getCollections()
print(str(len(collections_json)) + " collections were found.")
collections = [item['Collection'] for item in collections_json]
print('Collections: ', collections)
collections = ['PROSTATE-DIAGNOSIS']

# What is a nbia series?
# Ans: its just a description of the datasets available
series_df = nbia.getSeries(collections[0], format="df")

print("Series DataFrame shape:", series_df.shape)
series_df.head(1) 

In [None]:
try:
    series_uid = series_df['SeriesInstanceUID'].iloc[0]
    bodyPart = series_df['BodyPartExamined'].iloc[0] 
    print(f"\nAttempting to download series for body part - {bodyPart}: {series_uid}")
    
    # Download with more specific parameters
    nbia.downloadSeries(
        [series_uid], 
        number=1, 
        input_type="list",
        path="./downloads"  # TODO: Specify download directory, rename this env to not interfere with other people's work
    )
    print("Download completed successfully!")
    
except Exception as e:
    print(f"Download failed: {e}")

## 1. Loading Images with SimpleITK

In [None]:
# Method 1: Read a single DICOM file
try:
    # Load a single DICOM file
    folderpath = "./downloads/1.3.6.1.4.1.14519.5.2.1.4792.2002.115178120278427420002997506613/" #TODO replace with path of where you downloaded to
    image_path = f'{folderpath}1-01.dcm'  # Update with your image path
    image = sitk.ReadImage(image_path)
    print('Single DICOM Image size:', image.GetSize())
    print('Image spacing:', image.GetSpacing())
except:
    print("Single DICOM file not found")


In [None]:
# The following line will fail. Why?
# Ans: TODO

sitk.Show(image, title="image 1")

In [None]:
nda = sitk.GetArrayFromImage(image)
# The following line will also fail. Why?
# Ans: TODO

plt.imshow(nda)

In [None]:
# Try this line:
plt.imshow(np.squeeze(nda), cmap='gray')
plt.title('Original DICOM Image')
plt.axis('off')
plt.show()

## 2. Image Binarization and Selective Coloring

Now let's work with binarization, masking, and selective coloring techniques.

Lets go over a quick example of image binarization aka Image Thresholding (one of the traditional image segmentation techniques).

We are going over this because it's the easiest example and also because I want you to get familiar with how `skimage` does certain stuff

please ensure you've downloaded `scikit-image` in the cell below if you haven't downloaded it from the `requirements.txt`

In [None]:
import cv2
from skimage import filters, morphology
from skimage.color import rgb2hsv, hsv2rgb

In [None]:
img_array = np.squeeze(nda)
print(f"Dtype: {img_array.shape}")
print(f"Dtype: {img_array.dtype}")
print(f"Min value: {np.min(img_array)}")
print(f"Max value: {np.max(img_array)}")
# Q: What does the shape tell us about the color of the image
# Ans: TODO

In [None]:
# Notice that even though we only have one channel (so no RGB), our max pixel value is way above 255. 
# We will quickly normalize it to be in the pixel value range of [0, 255] and also convert the datatype from float64 to uint8 to ensure that this image is now compatible with a bunch of functions

image_normalized = cv2.normalize(img_array, None, 0, 255, cv2.NORM_MINMAX)
image_uint8 = image_normalized.astype(np.uint8)

In [None]:
def create_mask(image):
    # _, binary = cv2.threshold(image, thresh_value, max_value, threshold method) 
    _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    _, binary = cv2.threshold(image, 0, 255,)
    return binary

binary_mask = create_mask(image_uint8)

alpha = 0.6  # transparency of original image
beta = 0.4   # transparency of overlay
overlayed = cv2.addWeighted(image_uint8, alpha, binary_mask.astype(np.uint8), beta, 0)

In [None]:
# Plotting

plt.figure(figsize=(20, 5))

# Ans: TODO 
# plot img_array 
# plot binary_mask 
# plot overlayed 

#  note: plot as grayscale, since plt defaults to color mapping with 3 channels