In [26]:
import os
import numpy as np
import nibabel
import random
import shutil

In [27]:
# Dataset storage location
data_path = 'brats2020/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData'

# Location to save preprocessed data
save_path = 'anomaly_brats'

if not os.path.exists(save_path):
    os.mkdir(save_path)

dir_list = os.listdir(data_path)

modalities = ['t1', 't1ce', 't2', 'flair', 'seg']

In [28]:
def preprocess():
    num = 1

    for index, dir in enumerate(dir_list):
        
        # Skip if the directory contains this file (Brats2021 dataset specific)
        print(dir)

        if dir == '.DS_Store':
            continue

        print(f"{index} / {len(dir_list)}")

        patient_path = os.path.join(data_path, dir)

        model_data = {}
        for model in modalities:
            filename = dir + "_" + model + ".nii"
            file_path = os.path.join(patient_path, filename)
            data = nibabel.load(file_path).get_fdata()
            model_data[model] = data

        for i in range(80, 129):
            file_num = str(num).zfill(6)
            save_slice_path = os.path.join(save_path, file_num)
            if not os.path.exists(save_slice_path):
                os.mkdir(save_slice_path)

            for model in modalities:
                file_name = dir + "_" + model + "_" + str(i).zfill(3) + ".nii.gz"

                save_model_path = os.path.join(save_slice_path, file_name)

                if model == 'seg':
                    # Map label values: 0, 1, 2, 4 to 0, 1, 2, 3
                    label = model_data[model][..., i]
                    label[label == 4] = 3
                    label = nibabel.Nifti1Image(label, affine=np.eye(4))
                    nibabel.save(label, save_model_path)
                else:
                    img_data = model_data[model]
                    x = img_data[..., i] - np.nanmin(img_data[..., i])
                    y = np.nanmax(img_data[..., i]) - np.nanmin(img_data[..., i])
                    y = y if y != 0 else 1.0
                    img = x / y  # (240, 240)

                    if img.max() > 1.0 or img.min() < 0:
                        print(f"--Error: {num} --")

                    img = nibabel.Nifti1Image(img, affine=np.eye(4))
                    nibabel.save(img, save_model_path)
            num += 1

In [32]:
def split_data():
    # Set paths and directory names
    training_path = os.path.join(save_path, "training")
    testing_path = os.path.join(save_path, "testing")

    # Create directories for training and testing sets
    os.makedirs(training_path, exist_ok=True)
    os.makedirs(testing_path, exist_ok=True)

    # Get list of files
    dir_list = os.listdir(save_path)

    # Shuffle the list of files
    random.shuffle(dir_list)

    # Calculate the number of samples for training and testing sets
    total_samples = len(dir_list)
    
    # Exclude the 'training' and 'testing' directories created earlier
    total_samples = total_samples - 2
    train_samples = int(0.9 * total_samples)
    test_samples = total_samples - train_samples

    train_health_num = 0
    test_health_num = 0

    for i, dir_name in enumerate(dir_list):
        print(f"{i} / {total_samples}")

        if dir_name == 'training' or dir_name == 'testing':
            continue

        source_dir_path = os.path.join(save_path, dir_name)

        if i < train_samples:
            file = os.listdir(source_dir_path)

            # Extract label to determine health status
            seg_files = [file_name for file_name in file if "seg" in file_name]

            if len(seg_files) == 0:
                print("---")
            seg_file = os.path.join(source_dir_path, seg_files[0])
            image = nibabel.load(seg_file).get_fdata()

            if image.max() == 0:
                train_health_num += 1

            destination_dir_path = os.path.join(training_path, dir_name)

        else:
            file = os.listdir(source_dir_path)

            seg_files = [file_name for file_name in file if "seg" in file_name]
            if len(seg_files) == 0:
                print("---")
            seg_file = os.path.join(source_dir_path, seg_files[0])
            image = nibabel.load(seg_file).get_fdata()
            if image.max() == 0:
                test_health_num += 1
            destination_dir_path = os.path.join(testing_path, dir_name)

            # Extract 'seg' for test_labels separately
            # ... (write your code here)

        # Move directories
        shutil.move(source_dir_path, destination_dir_path)

    print(f"Training set: Healthy {train_health_num}, Abnormal {train_samples - train_health_num}, Total {train_samples}")
    print(f"Testing set: Healthy {test_health_num}, Abnormal {test_samples - test_health_num}, Total {test_samples}")

In [30]:
preprocess()

BraTS20_Training_082
0 / 370
BraTS20_Training_244
1 / 370
BraTS20_Training_076
2 / 370
BraTS20_Training_049
3 / 370
BraTS20_Training_071
4 / 370
BraTS20_Training_243
5 / 370
BraTS20_Training_085
6 / 370
BraTS20_Training_288
7 / 370
BraTS20_Training_047
8 / 370
BraTS20_Training_275
9 / 370
BraTS20_Training_281
10 / 370
BraTS20_Training_078
11 / 370
BraTS20_Training_286
12 / 370
BraTS20_Training_272
13 / 370
BraTS20_Training_040
14 / 370
BraTS20_Training_219
15 / 370
BraTS20_Training_014
16 / 370
BraTS20_Training_226
17 / 370
BraTS20_Training_221
18 / 370
BraTS20_Training_013
19 / 370
BraTS20_Training_228
20 / 370
BraTS20_Training_217
21 / 370
BraTS20_Training_025
22 / 370
BraTS20_Training_022
23 / 370
BraTS20_Training_210
24 / 370
BraTS20_Training_041
25 / 370
BraTS20_Training_273
26 / 370
BraTS20_Training_287
27 / 370
BraTS20_Training_280
28 / 370
BraTS20_Training_274
29 / 370
BraTS20_Training_046
30 / 370
BraTS20_Training_079
31 / 370
BraTS20_Training_084
32 / 370
BraTS20_Training_242

In [33]:
split_data()

0 / 18081
1 / 18081
2 / 18081
3 / 18081
4 / 18081
5 / 18081
6 / 18081
7 / 18081
8 / 18081
9 / 18081
10 / 18081
11 / 18081
12 / 18081
13 / 18081
14 / 18081
15 / 18081
16 / 18081
17 / 18081
18 / 18081
19 / 18081
20 / 18081
21 / 18081
22 / 18081
23 / 18081
24 / 18081
25 / 18081
26 / 18081
27 / 18081
28 / 18081
29 / 18081
30 / 18081
31 / 18081
32 / 18081
33 / 18081
34 / 18081
35 / 18081
36 / 18081
37 / 18081
38 / 18081
39 / 18081
40 / 18081
41 / 18081
42 / 18081
43 / 18081
44 / 18081
45 / 18081
46 / 18081
47 / 18081
48 / 18081
49 / 18081
50 / 18081
51 / 18081
52 / 18081
53 / 18081
54 / 18081
55 / 18081
56 / 18081
57 / 18081
58 / 18081
59 / 18081
60 / 18081
61 / 18081
62 / 18081
63 / 18081
64 / 18081
65 / 18081
66 / 18081
67 / 18081
68 / 18081
69 / 18081
70 / 18081
71 / 18081
72 / 18081
73 / 18081
74 / 18081
75 / 18081
76 / 18081
77 / 18081
78 / 18081
79 / 18081
80 / 18081
81 / 18081
82 / 18081
83 / 18081
84 / 18081
85 / 18081
86 / 18081
87 / 18081
88 / 18081
89 / 18081
90 / 18081
91 / 1808