## DICOM Image Preprocesing

### All Jitter

In [None]:
# Jitter: contains translation, rotation, and horizontal flip.
# We will be testing translation only, rotation only, flip only, and all jitter
#       - For translation only: run the block with output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/translation_only" and comment out other preprocessing techniques
#       - For rotation only: run the block with output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/rotation_only" and comment out other preprocessing techniques
#       - For flip only: run the block with output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/flip_only" and comment out other preprocessing techniques
#       - For all jitter: run the block with output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/all_jitter" 

import pydicom
import numpy as np
import random
import os
import cv2
from collections import defaultdict

root_dir_string = "/Users/lukeyun/SDS323/manifest-1599750808610/Pancreas-CT" # Change as necessary
root_dir_list = sorted(os.listdir(root_dir_string))
data = defaultdict(list)
output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/all_jitter"

# Go through the files in root directory
for folder in root_dir_list:

    # Skip at non-data filesp
    if not folder.startswith("PANCREAS"):
        continue

    # Find data files
    # Assuming all data is organized the same
    data_dir_string = list(os.walk(os.path.join(root_dir_string, folder)))[2][0]
    dcm_files = sorted(list(os.walk(os.path.join(root_dir_string, folder)))[2][2])

    # creates new folder in normalized images with same name as the folder that is being opened
    output_folder_path = os.path.join(output_root_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)

    # Read the data
    for image_file in dcm_files:

        cur_data = pydicom.dcmread(os.path.join(data_dir_string, image_file))
        cur_data_pixels = cur_data.pixel_array # extract only the picture

         # Apply random translation
        max_translate = 10
        dx = random.randint(-max_translate, max_translate)
        dy = random.randint(-max_translate, max_translate)
        translation_matrix = np.float32([[1, 0, dx], [0, 1, dy]])
        translated_image = cv2.warpAffine(cur_data_pixels, translation_matrix, (cur_data_pixels.shape[1], cur_data_pixels.shape[0]))

        # Apply random rotation
        max_rotation_angle = 10
        rotation_angle = random.uniform(-max_rotation_angle, max_rotation_angle)
        rotation_matrix = cv2.getRotationMatrix2D((cur_data_pixels.shape[1] / 2, cur_data_pixels.shape[0] / 2), rotation_angle, 1)
        rotated_image = cv2.warpAffine(translated_image, rotation_matrix, (cur_data_pixels.shape[1], cur_data_pixels.shape[0]))

        # Apply random horizontal flip
        if random.random() < 0.5:
            flipped_image = np.flip(rotated_image, axis=1)  # Horizontal flip
        else:
            flipped_image = rotated_image
        
        # Save the jittered image as a new DICOM file
        jittered_ds = cur_data.copy()
        jittered_ds.PixelData = flipped_image.tobytes()
        jittered_ds.save_as(os.path.join(output_folder_path, image_file))

        print(f"Preprocessing completed for {image_file}")

### Translation only

In [None]:
# translation only preprocessing

import pydicom
import numpy as np
import random
import os
import cv2
from collections import defaultdict

root_dir_string = "/Users/lukeyun/SDS323/manifest-1599750808610/Pancreas-CT" # Change as necessary
root_dir_list = sorted(os.listdir(root_dir_string))
data = defaultdict(list)
output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/translation_only"

# Go through the files in root directory
for folder in root_dir_list:

    # Skip at non-data filesp
    if not folder.startswith("PANCREAS"):
        continue

    # Find data files
    # Assuming all data is organized the same
    data_dir_string = list(os.walk(os.path.join(root_dir_string, folder)))[2][0]
    dcm_files = sorted(list(os.walk(os.path.join(root_dir_string, folder)))[2][2])

    # creates new folder in normalized images with same name as the folder that is being opened
    output_folder_path = os.path.join(output_root_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)

    # Read the data
    for image_file in dcm_files:

        cur_data = pydicom.dcmread(os.path.join(data_dir_string, image_file))
        cur_data_pixels = cur_data.pixel_array # extract only the picture

         # Apply random translation
        max_translate = 10
        dx = random.randint(-max_translate, max_translate)
        dy = random.randint(-max_translate, max_translate)
        translation_matrix = np.float32([[1, 0, dx], [0, 1, dy]])
        translated_image = cv2.warpAffine(cur_data_pixels, translation_matrix, (cur_data_pixels.shape[1], cur_data_pixels.shape[0]))
        
        # Save the jittered image as a new DICOM file
        translated_ds = cur_data.copy()
        translated_ds.PixelData = translated_image.tobytes()
        translated_ds.save_as(os.path.join(output_folder_path, image_file))

        print(f"Preprocessing completed for {image_file}")

### Rotation Only

In [None]:
import pydicom
import numpy as np
import random
import os
import cv2
from collections import defaultdict

root_dir_string = "/Users/lukeyun/SDS323/manifest-1599750808610/Pancreas-CT" # Change as necessary
root_dir_list = sorted(os.listdir(root_dir_string))
data = defaultdict(list)
output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/rotation_only"

# Go through the files in root directory
for folder in root_dir_list:

    # Skip at non-data filesp
    if not folder.startswith("PANCREAS"):
        continue

    # Find data files
    # Assuming all data is organized the same
    data_dir_string = list(os.walk(os.path.join(root_dir_string, folder)))[2][0]
    dcm_files = sorted(list(os.walk(os.path.join(root_dir_string, folder)))[2][2])

    # creates new folder in normalized images with same name as the folder that is being opened
    output_folder_path = os.path.join(output_root_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)

    # Read the data
    for image_file in dcm_files:

        cur_data = pydicom.dcmread(os.path.join(data_dir_string, image_file))
        cur_data_pixels = cur_data.pixel_array # extract only the picture

        # Apply random rotation
        max_rotation_angle = 10
        rotation_angle = random.uniform(-max_rotation_angle, max_rotation_angle)
        rotation_matrix = cv2.getRotationMatrix2D((cur_data_pixels.shape[1] / 2, cur_data_pixels.shape[0] / 2), rotation_angle, 1)
        rotated_image = cv2.warpAffine(cur_data_pixels, rotation_matrix, (cur_data_pixels.shape[1], cur_data_pixels.shape[0]))
        
        # Save the jittered image as a new DICOM file
        rotated_ds = cur_data.copy()
        rotated_ds.PixelData = rotated_image.tobytes()
        rotated_ds.save_as(os.path.join(output_folder_path, image_file))

        print(f"Preprocessing completed for {image_file}")

### Flip Only

In [None]:
import pydicom
import numpy as np
import random
import os
import cv2
from collections import defaultdict

root_dir_string = "/Users/lukeyun/SDS323/manifest-1599750808610/Pancreas-CT" # Change as necessary
root_dir_list = sorted(os.listdir(root_dir_string))
data = defaultdict(list)
output_root_path = "/Users/lukeyun/SDS323/Jitter_Images/flip_only"

# Go through the files in root directory
for folder in root_dir_list:

    # Skip at non-data filesp
    if not folder.startswith("PANCREAS"):
        continue

    # Find data files
    # Assuming all data is organized the same
    data_dir_string = list(os.walk(os.path.join(root_dir_string, folder)))[2][0]
    dcm_files = sorted(list(os.walk(os.path.join(root_dir_string, folder)))[2][2])

    # creates new folder in normalized images with same name as the folder that is being opened
    output_folder_path = os.path.join(output_root_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)

    # Read the data
    for image_file in dcm_files:

        cur_data = pydicom.dcmread(os.path.join(data_dir_string, image_file))
        cur_data_pixels = cur_data.pixel_array # extract only the picture


        # Apply random horizontal flip
        if random.random() < 0.5:
            flipped_image = np.flip(cur_data_pixels, axis=1)  # Horizontal flip
        else:
            flipped_image = cur_data_pixels
        
        # Save the jittered image as a new DICOM file
        flipped_ds = cur_data.copy()
        flipped_ds.PixelData = flipped_image.tobytes()
        flipped_ds.save_as(os.path.join(output_folder_path, image_file))

        print(f"Preprocessing completed for {image_file}")

### Intensity Normalization

In [None]:
# intensity normalization: preprocessing
root_dir_string = "/Users/lukeyun/SDS323/manifest-1599750808610/Pancreas-CT" # Change as necessary
root_dir_list = sorted(os.listdir(root_dir_string))
data = defaultdict(list)
output_root_path = "/Users/lukeyun/SDS323/Normalized_Images"

# Go through the files in root directory
for file in root_dir_list:

    # Skip at non-data filesp
    if not file.startswith("PANCREAS"):
        continue

    # Find data files
    # Assuming all data is organized the same
    data_dir_string = list(os.walk(os.path.join(root_dir_string, file)))[2][0]
    dcm_files = sorted(list(os.walk(os.path.join(root_dir_string, file)))[2][2])

    # creates new folder in normalized images with same name as the folder that is being opened
    output_folder_path = os.path.join(output_root_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)

    # Read the data
    for image_file in dcm_files:

        cur_data = pydicom.dcmread(os.path.join(data_dir_string, image_file))
        cur_data_pixels = cur_data.pixel_array # extract only the picture

        # Perform intensity normalization
        min_val = np.min(cur_data_pixels)
        max_val = np.max(cur_data_pixels)
        normalized_array = (cur_data_pixels - min_val) / (max_val - min_val) * 255

        # Save the normalized image as a new DICOM file
        normalized_ds = cur_data.copy()
        normalized_ds.PixelData = normalized_array.astype(np.uint8).tobytes()
        normalized_ds.save_as(os.path.join(output_folder_path, image_file))

        print(f"Intensity normalization completed for {image_file}") # need to add the output path