This file does: 
1. environment setup
2. loading the data
3. reorienting + resizing
4. noise removal + smoothing
5. normalizing + standardizing
6. adjusting brightness + adjusting contrast + adjusting sharpness
7. thresholding
8. resampling
9. data augmentation

# Setup

In [1]:
#!pip install nilearn
#!pip install dicom2nifti
#!pip install scikit-image
#!pip install requests

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import matplotlib.pyplot as plt
import os
import numpy as np
import statistics
import skimage.transform as skTrans

%matplotlib inline

In [2]:
data_path = r'D:\BME\MSc\dl-project\ACDC\database'
start_patient_id = 1
end_patient_id = 150

<pre>
└── dl-project
    └── ACDC
        └── database
            ├── patient001
            ├── ...
            └── patient150
</pre>

# Load data

In [3]:
#organize every filepath in a given range of patients into a list

def get_image_paths(data_path, start_patient_id, end_patient_id):

    paths = []

    for current_patient_id in range(start_patient_id, end_patient_id + 1):
        
        current_patient = {
            'start_frame': None,
            'start_frame_gt': None,
            'end_frame': None,
            'end_frame_gt': None
        }
        
        current_patient_folder = os.listdir(f'{data_path}\patient{str(current_patient_id).zfill(3)}')

        for file in current_patient_folder:
            taken = False
            if "_frame01" in file and not "_gt.nii.gz" in file and not taken:
                current_patient['start_frame'] = os.path.join(f'{data_path}\patient{str(current_patient_id).zfill(3)}', file)
                taken = True
            elif "_frame01" in file and "_gt.nii.gz" in file and not taken:
                current_patient['start_frame_gt'] = os.path.join(f'{data_path}\patient{str(current_patient_id).zfill(3)}', file)
                taken = True
            elif ".nii.gz" in file and not "_gt.nii.gz" in file and not taken:
                current_patient['end_frame'] = os.path.join(f'{data_path}\patient{str(current_patient_id).zfill(3)}', file)
                taken = True
            elif "_gt.nii.gz" in file and not taken:
                current_patient['end_frame_gt'] = os.path.join(f'{data_path}\patient{str(current_patient_id).zfill(3)}', file)
                taken = True
            elif not ".cfg" in file and not "CITATION" in file and not "4d" in file:
                print(f'Something went wrong for: {file}')
    
        paths.append(current_patient)

    return paths


image_paths = get_image_paths(data_path, start_patient_id, end_patient_id)


In [4]:
# print(len(image_paths))
# print(type(image_paths))
# print(image_paths.index("???"))

# os.listdir(os.path.join(f'{data_path}\patient{str(90).zfill(3)}'))
# print(nib.load(image_paths[89]["start_frame_gt"]).get_fdata())
# for i in image_paths:
#     print(i)
    # if i["start_frame"] is None:
    #     print("start_frame is None:")
    # elif i["start_frame_gt"] is None:
    #     print("start_frame_gt is None:")
    # elif i["end_frame"] is None:
    #     print("end_frame is None:")
    # elif i["end_frame_gt"] is None:
    #     print("end_frame_gt is None:")

In [5]:
#load data into a list of dicts of 4 elements

def load_images(image_paths):
    loaded_images = []
    for i in image_paths:
        loaded_images_of_patient = {
            'start_frame': nib.load(i["start_frame"]).get_fdata(),
            'start_frame_gt': nib.load(i["start_frame_gt"]).get_fdata(),
            'end_frame': nib.load(i["end_frame"]).get_fdata(),
            'end_frame_gt': nib.load(i["end_frame_gt"]).get_fdata()
        }
        loaded_images.append(loaded_images_of_patient)
    return loaded_images

loaded_images = load_images(image_paths)


In [None]:
type(loaded_images[4]["end_frame_gt"][:,:,0][156][154])
plt.figure(figsize=(10, 8))
plt.subplot(1, 2, 1)  # First subplot

# loaded_images[4]["end_frame_gt"][:, :, 2][loaded_images[4]["end_frame_gt"][:, :, 2] == 1.0] = 2.0
plt.imshow(loaded_images[4]["end_frame_gt"][:,:,2])
plt.subplot(1, 2, 2)  # Second subplot
plt.imshow(loaded_images[4]["end_frame"][:,:,2])
# for i in range(len(loaded_images[4]["end_frame_gt"][:,:,0])):
#     for row in loaded_images[4]["end_frame_gt"][:,:,0][i]:
#         if np.any(loaded_images[4]["end_frame_gt"][:,:,0][i] != 0):
#             print(loaded_images[4]["end_frame_gt"][:,:,0][i])

In [15]:
import nibabel as nib
import numpy as np

unique_labels_set = set()

for nifti_img in loaded_images:

    # Find unique labels in the current image and add them to the set
    unique_labels_set.update(np.unique(nifti_img["start_frame_gt"]))

# Convert the set of unique labels to a sorted list
unique_labels_list = sorted(list(unique_labels_set))

# Print the list of unique labels across all loaded images
print("Unique Labels List:", unique_labels_list)

Unique Labels List: [0.0, 1.0, 2.0, 3.0]


In [None]:
# print(len(loaded_images))
# print(type(loaded_images))
# # print(loaded_images.index("???"))
# print(len(loaded_images[0]))
# print(type(loaded_images[0]))
# print(loaded_images[0].keys())
# print(loaded_images[0].items())
# print(loaded_images[0].values())
# loaded_images[0]
# print(type(loaded_images[0]["start_frame"]))
# for i in loaded_images:
#     for j in i.values():
#         print(j.shape)

In [6]:
def run_stats(loaded_images):
    if isinstance(loaded_images[0], dict):
        shape_counts = {}
        for d in loaded_images:
            for key, value in d.items():
                if isinstance(value, np.ndarray) and value.ndim == 3:
                    shape = value.shape
                    # Use the shape as a key in the dictionary and increment the count
                    shape_counts[shape] = shape_counts.get(shape, 0) + 1

        # Sort shapes by count in descending order
        sorted_shapes = sorted(shape_counts.items(), key=lambda x: x[1], reverse=True)
        for shape, count in sorted_shapes[:5]:
            print(f"Shape {shape}: Count {count}")
        print("...")
        for shape, count in sorted_shapes[-5:]:
            print(f"Shape {shape}: Count {count}")

        total_count = sum(count for _, count in sorted_shapes)
        print(f"Total count of different shapes: {total_count}\n")

        min_values = []
        for dim in range(len(sorted_shapes[0][0])):
            min_value = min(shape[dim] for shape, _ in sorted_shapes)
            min_values.append(min_value)
        print(f"Minimum values for each dimension: {min_values}\n")

        for dim in range(len(sorted_shapes[0][0])):
            dimension_values = [shape[dim] for shape, _ in sorted_shapes]
            mode_value = statistics.mode(dimension_values)
            range_value = max(dimension_values) - min(dimension_values)
            print(f"Dimension {dim + 1}: Mode {mode_value}, Min {min(dimension_values)}, Max {max(dimension_values)}, Range {range_value}")
        total_sum_third_dim = sum(shape[2] for shape, _ in sorted_shapes)
        print(f"\nTotal sum of values in the 3rd dimension (z): {total_sum_third_dim}")
        
    elif isinstance(loaded_images[0], np.ndarray):
        shape_counts = {}
        for d in loaded_images:
            if isinstance(d, np.ndarray) and d.ndim == 3:
                shape = d.shape
                shape_counts[shape] = shape_counts.get(shape, 0) + 1

        sorted_shapes = sorted(shape_counts.items(), key=lambda x: x[1], reverse=True)
        for shape, count in sorted_shapes:
            print(f"Shape {shape}: Count {count}")

        total_count = sum(count for _, count in sorted_shapes)
        print(f"Total count of different shapes: {total_count}\n")

        print(f'Sorted shapes: {sorted_shapes}')
        # min_values = []
        # for dim in range(len(sorted_shapes)):
        #     min_value = min(shape[dim] for shape in sorted_shapes)
        #     min_values.append(min_value)
        # print(f"Minimum values for each dimension: {min_values}\n")

        # for dim in range(len(sorted_shapes)):
        #     dimension_values = [shape[dim] for shape in sorted_shapes]
        #     mode_value = statistics.mode(dimension_values)
        #     range_value = max(dimension_values) - min(dimension_values)
        #     print(f"Dimension {dim + 1}: Mode {mode_value}, Min {min(dimension_values)}, Max {max(dimension_values)}, Range {range_value}")
        
        # total_sum_third_dim = sum(shape[2] for shape, _ in sorted_shapes)
        # print(f"\nTotal sum of values in the 3rd dimension (z): {total_sum_third_dim}")

run_stats(loaded_images)

Shape (216, 256, 9): Count 56
Shape (216, 256, 10): Count 44
Shape (216, 256, 8): Count 44
Shape (256, 216, 10): Count 36
Shape (256, 216, 9): Count 28
...
Shape (174, 208, 16): Count 4
Shape (174, 208, 20): Count 4
Shape (232, 288, 15): Count 4
Shape (192, 256, 8): Count 4
Shape (214, 256, 10): Count 4
Total count of different shapes: 600

Minimum values for each dimension: [154, 154, 6]

Dimension 1: Mode 256, Min 154, Max 428, Range 274
Dimension 2: Mode 256, Min 154, Max 512, Range 358
Dimension 3: Mode 10, Min 6, Max 21, Range 15

Total sum of values in the 3rd dimension (z): 778


# Reorient + resize

In [7]:
print(type(loaded_images[0]["start_frame"]))

<class 'numpy.ndarray'>


In [None]:
# reorient

desired_orientation = nib.orientations.axcodes2ornt(('R', 'A', 'S'))
# for image_dict in loaded_images:
#     for key, image_data in image_dict.items():
#         img = nib.Nifti1Image(image_data, affine=np.eye(4))
#         original_orientation = nib.orientations.axcodes2ornt(img.affine)
        
#         if not nib.orientations.ornt_eq(original_orientation, desired_orientation):
#             # Transform the image to the desired orientation
#             img = nib.orientations.apply_orientation(img, nib.orientations.ornt_transform(original_orientation, desired_orientation))
            
#             # Update the dictionary with the reoriented ndarray
#             image_dict[key] = img.get_fdata()

unique_orientations = set()
for image_dict in loaded_images:
    # Loop through the keys in each dictionary (image1, image2, image3, image4)
    for key, image_data in image_dict.items():
        # Create a temporary NIfTI image from the ndarray
        img = nib.Nifti1Image(image_data, affine=np.eye(4))
        
        # Get the orientation codes from the affine matrix
        orientation = nib.orientations.aff2axcodes(img.affine)
        
        # Add the orientation to the set to keep only unique orientations
        unique_orientations.add(tuple(orientation))

# Convert the unique orientations back to lists for readability, if needed
unique_orientations = [list(o) for o in unique_orientations]

# Now, unique_orientations contains a list of all unique orientations in the input dictionaries
print(unique_orientations)


In [238]:
#transform every (x, y, z) voxel to "z" number of (x, y) images

more_loaded_images = []
for dict in loaded_images:
    for key, value in dict.items():
        shape = value.shape
        modified_dict = {}
        
        for k in range(shape[2]):
            sliced_array = value[:, :, k]
            modified_dict[f'array_{k+1}'] = sliced_array
        
        more_loaded_images.append({key: modified_dict})

In [248]:
print(len(more_loaded_images))
print(type(more_loaded_images))
print(type(more_loaded_images[0]))
print(type(more_loaded_images[0]["start_frame"]))
print(type(more_loaded_images[0]["start_frame"]["array_1"]))
print(more_loaded_images[0]["start_frame"]["array_1"].shape)
# print(more_loaded_images[0]["start_frame_gt"]["array_1"]) #error - még nem jó a struktúra


600
<class 'list'>
<class 'dict'>
<class 'dict'>
<class 'numpy.ndarray'>
(216, 256)


In [213]:
#Hajni:

#resize images
loaded_images_resized = []
for i in range(len(loaded_images)):
    for j in loaded_images[i].values():
        loaded_images_resized.append(skTrans.resize(j, (256,256,1), order=1, preserve_range=False, anti_aliasing=True))

# squeeze to 2d
for i in range(len(loaded_images_resized)):
    loaded_images_resized[i] = np.squeeze(loaded_images_resized[i], axis=2)

In [222]:
print(type(loaded_images_resized))
print(len(loaded_images_resized))
run_stats(loaded_images_resized)
loaded_images_resized[0].shape

<class 'list'>
600
Total count of different shapes: 0

Sorted shapes: []


(256, 256)

# Normalizing

In [11]:
from sklearn import preprocessing
from scipy import ndimage
import numpy as np

X_train = np.array([[ 1., -1.,  2.],[ 2.,  0.,  0.],[ 0.,  1., -1.]])

scaler = preprocessing.StandardScaler().fit(X_train)

X_scaled = scaler.transform(X_train)

In [13]:
print(scaler)

print(X_train)
print(scaler.mean_)
print(scaler.scale_)

print(X_scaled)
print(X_scaled.mean(axis=0))
print(X_scaled.std(axis=0))

StandardScaler()
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[1.         0.         0.33333333]
[0.81649658 0.81649658 1.24721913]
[[ 0.         -1.22474487  1.33630621]
 [ 1.22474487  0.         -0.26726124]
 [-1.22474487  1.22474487 -1.06904497]]
[0. 0. 0.]
[1. 1. 1.]


In [15]:
min_max_scaler = preprocessing.MinMaxScaler()

X_train_minmax = min_max_scaler.fit_transform(X_train)

print(X_train)
print(X_train_minmax)

[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[0.5        0.         1.        ]
 [1.         0.5        0.33333333]
 [0.         1.         0.        ]]


In [17]:
X_normalized = preprocessing.normalize(X_train, norm='l2')

print(X_train)
print(X_normalized)

[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[ 0.40824829 -0.40824829  0.81649658]
 [ 1.          0.          0.        ]
 [ 0.          0.70710678 -0.70710678]]


# TODO

In [None]:
%pip install reorient-nii

from 

In [None]:
# reorienting + resizing

# noise removal + smoothing
from scipy.ndimage import gaussian_filter

img = nib.load('path/to/your/image.nii')
img_data = img.get_fdata()
smoothed_img_data = gaussian_filter(img_data, sigma=1.0)
smoothed_img = nib.Nifti1Image(smoothed_img_data, affine=img.affine)
nib.save(smoothed_img, 'path/to/save/smoothed_image.nii')


# normalizing + standardizing
from sklearn.preprocessing import MinMaxScaler

img = nib.load('path/to/your/image.nii')
img_data = img.get_fdata()
scaler = MinMaxScaler()
normalized_img_data = scaler.fit_transform(img_data.reshape(-1, 1)).reshape(img_data.shape)
normalized_img = nib.Nifti1Image(normalized_img_data, affine=img.affine)
nib.save(normalized_img, 'path/to/save/normalized_image.nii')


from sklearn.preprocessing import StandardScaler
import nibabel as nib

img = nib.load('path/to/your/image.nii')
img_data = img.get_fdata()
# Reshape the data to (num_samples, num_features)
img_data_flat = img_data.reshape(-1, 1)
scaler = StandardScaler()
standardized_data = scaler.fit_transform(img_data_flat).reshape(img_data.shape)
standardized_img = nib.Nifti1Image(standardized_data, affine=img.affine)
nib.save(standardized_img, 'path/to/save/standardized_image.nii')

# adjusting brightness + adjusting contrast + adjusting sharpness

# thresholding

# resampling
import nibabel as nib
from scipy.ndimage import zoom

img = nib.load('path/to/your/image.nii')
target_voxel_size = (1.0, 1.0, 1.0)

resampling_factor = (img.header.get_zooms()[0] / target_voxel_size[0],
                     img.header.get_zooms()[1] / target_voxel_size[1],
                     img.header.get_zooms()[2] / target_voxel_size[2])

resampled_img_data = zoom(img.get_fdata(), resampling_factor, order=3)  # order=3 for cubic interpolation
resampled_img = nib.Nifti1Image(resampled_img_data, affine=None)
nib.save(resampled_img, 'path/to/save/resampled_image.nii')


# data augmentation
#pip install tensorflow
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# import numpy as np
# import nibabel as nib

# img = nib.load('path/to/your/image.nii')
# img_data = img.get_fdata()
# img_data = np.expand_dims(img_data, axis=-1)

# datagen = ImageDataGenerator(
#     rotation_range=20,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     vertical_flip=True,
#     fill_mode='nearest'
# )

# augmented_data = []
# for batch in datagen.flow(np.expand_dims(img_data, axis=0), batch_size=1):
#     augmented_data.append(batch[0, :, :, :, 0])
#     if len(augmented_data) >= 10:  # Generate 10 augmented samples
#         break

# for i, augmented_img_data in enumerate(augmented_data):
#     augmented_img = nib.Nifti1Image(augmented_img_data, affine=img.affine)
#     nib.save(augmented_img, f'path/to/save/augmented_image_{i}.nii')



# Other

In [124]:
from PIL import ImageOps, Image, ImageFilter, ImageDraw
import numpy as np
import cv2

#img = Image.fromarray(loaded_images[1]["start_frame"][:, :, 1].astype(np.uint8))
#img.show()

img = loaded_images[1]["start_frame"][:, :, 1].astype(np.uint8)
img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 1
cv2.imshow('Processed Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [1]:
def plot_images(loaded_images):
    titles = ['start_frame', 'start_frame_gt', 'end_frame', 'end_frame_gt']

    for i in range(len(loaded_images)):
        plt.figure(figsize=(10, 8))
        plt.suptitle(f'Patient {i+1}')

        for j in range(4):
            plt.subplot(2, 2, j + 1)
            plt.imshow(loaded_images[i][titles[j]][:, :, 1], cmap='gray')
            plt.title(titles[j])
            plt.axis('off')




plot_images(loaded_images[:10])

NameError: name 'loaded_images' is not defined