In [2]:
import os
%pwd

'/home/towet/Desktop/Visions/tumors/Multimodal-Brain-Tumor-Segmentation/research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'/home/towet/Desktop/Visions/tumors/Multimodal-Brain-Tumor-Segmentation'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataPreprocessingConfig:
    root_dir: Path
    img_dir : Path 
    mask_dir : Path 
    dataset : Path 
    splited_dataset : Path
    

In [6]:
from tumorsegmentation.constants import *
from tumorsegmentation.utils.common import read_yaml, create_directories
import os

In [12]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_preprocessing_config(self) -> DataPreprocessingConfig:
        config = self.config.data_preprocessing
        create_directories([config.root_dir])
        create_directories([config.img_dir, config.mask_dir, config.splited_dataset])

        dataset = os.path.join("artifacts", "data_ingestion", "raw_dataset", "BraTS20Dataset", "BraTS2020_TrainingData", "MICCAI_BraTS2020_TrainingData")
        
        data_preprocessing_config = DataPreprocessingConfig(
            root_dir=config.root_dir,
            img_dir=config.img_dir,
            mask_dir=config.mask_dir,
            dataset=Path(dataset),
            splited_dataset=config.splited_dataset,
            
        )
        return data_preprocessing_config


In [9]:
import tensorflow as tf
import os
import nibabel as nib 
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import splitfolders
from tensorflow.keras.utils import to_categorical
import glob

In [25]:
class DataPreprocessing:
    def __init__(self, config: DataPreprocessingConfig):
        self.config = config

    def train_data_preprocessing(self):
        self.t2_list = sorted(glob.glob(f"{self.config.dataset}/*/*t2.nii"))
        self.t1ce_list = sorted(glob.glob(f"{self.config.dataset}/*/*t1ce.nii"))
        self.flair_list = sorted(glob.glob(f"{self.config.dataset}/*/*flair.nii"))
        self.mask_list = sorted(glob.glob(f"{self.config.dataset}/*/*seg.nii"))
        scaler = MinMaxScaler()

  
        for img in range(len(self.t2_list)):   #Using t1_list as all lists are of same size
            print("Now preparing image and masks number: ", img)

            temp_image_t2=nib.load(self.t2_list[img]).get_fdata()
            temp_image_t2=scaler.fit_transform(temp_image_t2.reshape(-1, temp_image_t2.shape[-1])).reshape(temp_image_t2.shape)

            temp_image_t1ce=nib.load(self.t1ce_list[img]).get_fdata()
            temp_image_t1ce=scaler.fit_transform(temp_image_t1ce.reshape(-1, temp_image_t1ce.shape[-1])).reshape(temp_image_t1ce.shape)

            temp_image_flair=nib.load(self.flair_list[img]).get_fdata()
            temp_image_flair=scaler.fit_transform(temp_image_flair.reshape(-1, temp_image_flair.shape[-1])).reshape(temp_image_flair.shape)

            temp_mask=nib.load(self.mask_list[img]).get_fdata()
            temp_mask=temp_mask.astype(np.uint8)
            temp_mask[temp_mask==4] = 3  #Reassign mask values 4 to 3
            #print(np.unique(temp_mask))


            temp_combined_images = np.stack([temp_image_flair, temp_image_t1ce, temp_image_t2], axis=3)

            #Crop to a size to be divisible by 64 so we can later extract 64x64x64 patches.
            #cropping x, y, and z
            temp_combined_images=temp_combined_images[56:184, 56:184, 13:141]
            temp_mask = temp_mask[56:184, 56:184, 13:141]

            val, counts = np.unique(temp_mask, return_counts=True)

            if (1 - (counts[0]/counts.sum())) > 0.01:  #At least 1% useful volume with labels that are not 0
                print("Save Me")
                temp_mask= to_categorical(temp_mask, num_classes=4)
                np.save(f"{self.config.img_dir}/image_"+str(img)+'.npy', temp_combined_images)
                np.save(f"{self.config.mask_dir}/mask_"+str(img)+'.npy', temp_mask)

            else:
                print("I am useless")
        

    def train_val_split(self):
        input_folder = "/home/towet/Desktop/Visions/tumors/Multimodal-Brain-Tumor-Segmentation/artifacts/preprocessed_data/dataset"
        output_folder = self.config.splited_dataset
        splitfolders.ratio(input_folder, output_folder, seed=42, ratio=(.8, 0.2), group_prefix=None)




In [26]:
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_data_preprocessing_config()
    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    data_preprocessing.train_data_preprocessing()
    data_preprocessing.train_val_split()
except Exception as e:
    raise e

[2023-11-25 10:19:07,640: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-11-25 10:19:07,644: INFO: common: yaml file: params.yaml loaded successfully]
[2023-11-25 10:19:07,647: INFO: common: created directory at: artifacts]
[2023-11-25 10:19:07,650: INFO: common: created directory at: artifacts/preprocessed_data]
[2023-11-25 10:19:07,652: INFO: common: created directory at: artifacts/preprocessed_data/dataset/images]
[2023-11-25 10:19:07,654: INFO: common: created directory at: artifacts/preprocessed_data/dataset/masks]
[2023-11-25 10:19:07,656: INFO: common: created directory at: artifacts/preprocessed_data/train_val_dataset]







[A[A[A[A[A






[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A




[A[A[A[A[A