# Pre-processing

### 1) Convert all DICOM images to .jpg format

---
Script Starts from there and here is the code to convert .dcm file to .jpg file
Code actually save the .jpg file with same name as of .dcm file
It place the converted .jpg file in the same input folder and then It removes the .dcm files from the curent folder

In [None]:
import os
import cv2 as cv
import numpy as np
import pydicom as PDCM

def Dicom_to_Image(Path):
    DCM_Img = PDCM.read_file(Path)

    rows = DCM_Img.get(0x00280010).value #Get number of rows from tag (0028, 0010)
    cols = DCM_Img.get(0x00280011).value #Get number of cols from tag (0028, 0011)

    Instance_Number = int(DCM_Img.get(0x00200013).value) #Get actual slice instance number from tag (0020, 0013)

    Window_Center = int(DCM_Img.get(0x00281050).value) #Get window center from tag (0028, 1050)
    Window_Width = int(DCM_Img.get(0x00281051).value) #Get window width from tag (0028, 1051)

    Window_Max = int(Window_Center + Window_Width / 2)
    Window_Min = int(Window_Center - Window_Width / 2)

    if (DCM_Img.get(0x00281052) is None):
        Rescale_Intercept = 0
    else:
        Rescale_Intercept = int(DCM_Img.get(0x00281052).value)

    if (DCM_Img.get(0x00281053) is None):
        Rescale_Slope = 1
    else:
        Rescale_Slope = int(DCM_Img.get(0x00281053).value)

    New_Img = np.zeros((rows, cols), np.uint8)
    Pixels = DCM_Img.pixel_array

    for i in range(0, rows):
        for j in range(0, cols):
            Pix_Val = Pixels[i][j]
            Rescale_Pix_Val = Pix_Val * Rescale_Slope + Rescale_Intercept

            if (Rescale_Pix_Val > Window_Max): #if intensity is greater than max window
                New_Img[i][j] = 255
            elif (Rescale_Pix_Val < Window_Min): #if intensity is less than min window
                New_Img[i][j] = 0
            else:
                New_Img[i][j] = int(((Rescale_Pix_Val - Window_Min) / (Window_Max - Window_Min)) * 255) #Normalize the intensities

    return New_Img, Instance_Number
# --------------------------------------------------

Input_Folder_path = 'train1'

def main_T1w():
    for file in os.listdir(Input_Folder_path):
        sub_fold = os.path.join("", file)
        print("Converting Patient No. :: ",sub_fold)
        Input_Folder = Input_Folder_path+'/'+sub_fold+'/T1w'
        Input_Image_List = os.listdir(Input_Folder)
        for i in range(0, len(Input_Image_List)):
            Output_Image, Instance_Number = Dicom_to_Image(Input_Folder + '/' + Input_Image_List[i])
            cv.imwrite(Input_Folder + '/' + str(Instance_Number - 1).zfill(4) + '.jpg', Output_Image)
    print("Converted All T1w type Images to .jpg")
# ------------------------------------------------------------
    files_in_directory = os.listdir(Input_Folder)
    filtered_files = [file for file in files_in_directory if file.endswith(".dcm")]
    for file in filtered_files:
        path_to_file = os.path.join(Input_Folder, file)
        os.remove(path_to_file)
    print('Remove all .dcm files from T1w Folders')
# --------------------------------------------------------------

def main_T1wCE():
    for file in os.listdir(Input_Folder_path):
        sub_fold = os.path.join("", file)
        print(sub_fold)
        Input_Folder = Input_Folder_path+'/'+sub_fold+'/T1wCE'
        Input_Image_List = os.listdir(Input_Folder)
        for i in range(0, len(Input_Image_List)):
            Output_Image, Instance_Number = Dicom_to_Image(Input_Folder + '/' + Input_Image_List[i])
            cv.imwrite(Input_Folder + '/' + str(Instance_Number - 1).zfill(4) + '.jpg', Output_Image)
    print("Converted All T1wCE type Images to .jpg")
# ------------------------------------------------------------
    files_in_directory = os.listdir(Input_Folder)
    filtered_files = [file for file in files_in_directory if file.endswith(".dcm")]
    for file in filtered_files:
        path_to_file = os.path.join(Input_Folder, file)
        os.remove(path_to_file)
    print('Remove all .dcm files from T1wCE Folders')
# --------------------------------------------------------------

def main_T2w():
    for file in os.listdir(Input_Folder_path):
        sub_fold = os.path.join("", file)
        print(sub_fold)
        Input_Folder = Input_Folder_path+'/'+sub_fold+'/T2w'
        Input_Image_List = os.listdir(Input_Folder)
        for i in range(0, len(Input_Image_List)):
            Output_Image, Instance_Number = Dicom_to_Image(Input_Folder + '/' + Input_Image_List[i])
            cv.imwrite(Input_Folder + '/' + str(Instance_Number - 1).zfill(4) + '.jpg', Output_Image)
    print("Converted All T2w type Images to .jpg")
# ------------------------------------------------------------
    files_in_directory = os.listdir(Input_Folder)
    filtered_files = [file for file in files_in_directory if file.endswith(".dcm")]
    for file in filtered_files:
        path_to_file = os.path.join(Input_Folder, file)
        os.remove(path_to_file)
    print('Remove all .dcm files from T2w Folders')
# --------------------------------------------------------------

def main_FLAIR():
    for file in os.listdir(Input_Folder_path):
        sub_fold = os.path.join("", file)
        print(sub_fold)
        Input_Folder = Input_Folder_path+'/'+sub_fold+'/FLAIR'
        Input_Image_List = os.listdir(Input_Folder)
        for i in range(0, len(Input_Image_List)):
            Output_Image, Instance_Number = Dicom_to_Image(Input_Folder + '/' + Input_Image_List[i])
            cv.imwrite(Input_Folder + '/' + str(Instance_Number - 1).zfill(4) + '.jpg', Output_Image)
    print("Converted All FLAIR type Images to .jpg")
# ------------------------------------------------------------
    files_in_directory = os.listdir(Input_Folder)
    filtered_files = [file for file in files_in_directory if file.endswith(".dcm")]
    for file in filtered_files:
        path_to_file = os.path.join(Input_Folder, file)
        os.remove(path_to_file)
    print('Remove all .dcm files from FLAIR Folders')
# --------------------------------------------------------------

if __name__ == "__main__":
        main_T1w()
        main_T1wCE()
        main_T2w()
        main_FLAIR()

Converted All T1w type Images to .jpg
Remove all .dcm files from T1w Folders
Converted All T1wCE type Images to .jpg
Remove all .dcm files from T1wCE Folders
Converted All T2w type Images to .jpg
Remove all .dcm files from T2w Folders
Converted All FLAIR type Images to .jpg
Remove all .dcm files from FLAIR Folders


# 2)Filter High Informative Images
Each type of images for each patient contain black images or images with less brain part or no brain part images, 
This is the Second script which filters the high informative images, and remove less brain part or low brain part while paying special attention to labels


In [None]:
from PIL import Image
import numpy as np
import os
Input_Folder_path = 'train/'
for file in os.listdir(Input_Folder_path):
    sub_fold = os.path.join("", file)
    print(sub_fold)
    path = Input_Folder_path+'/'+sub_fold+'/T1w'
    for filename in os.listdir(path):
        images = Image.open(os.path.join(path,filename))
        if np.mean(images) < 10: # take mean of pixels of each image and filter high informative images 
            os.remove(os.path.join(path, filename))
    print("Filter T1w type images ")        
# ------------------------------------------------------------------    
    path = Input_Folder_path+'/'+sub_fold+'/T1wCE'
    for filename in os.listdir(path):
        images = Image.open(os.path.join(path,filename))
        if np.mean(images) < 10:
            os.remove(os.path.join(path, filename))
    print("Filter T1wCE type images ")        
# ------------------------------------------------------------------            
    path = Input_Folder_path+'/'+sub_fold+'/T2w'
    for filename in os.listdir(path):
        images = Image.open(os.path.join(path,filename))
        if np.mean(images) < 10:
            os.remove(os.path.join(path, filename))
    print("Filter T2w type images ")        
# ------------------------------------------------------------------
    path = Input_Folder_path+'/'+sub_fold+'/FLAIR'
    for filename in os.listdir(path):
        images = Image.open(os.path.join(path,filename))
        if np.mean(images) < 10:
            os.remove(os.path.join(path, filename))
    print("Filter FLAIR type images ")


Filter T1w type images 
Filter T1wCE type images 
Filter T2w type images 
Filter FLAIR type images 


# 3) Make folders of label data sets
Here we have a CSV file, which contain the labels with each patient ID, So we make labels for each patient whether it belongs to class 1-(Methyl) or class 0-(Un-methyl)

In [None]:
import shutil
import pandas as pd
import os

In [None]:
df = pd.read_csv('train_labels.csv')

In [None]:
name_zero_folders = df[df['MGMT_value'] == 0]
name_one_folders = df[df['MGMT_value'] == 1]

In [None]:
source = "/home/ibrar/train/"
destination1 = '/home/ibrar/MGMT Training/zero/'
destination2 = '/home/ibrar/MGMT Training/one/'

In [None]:
req = os.listdir(source)

In [None]:
for r in req:
    if int(r) in name_zero_folders:
        print(source + '/'+ r)
        print(destination1)
        shutil.move(source + '/'+ r, destination1)

In [None]:
for r in req:
    if int(r) in name_one_folders:
        print(source + '/'+ r)
        print(destination2)
        shutil.move(source + '/'+ r, destination2)

### Separat T1w type images 
As we are gonna implement model for T1w type of images for FYP-1

In [None]:
import shutil
import os
source = '/home/ibrar/FInal Year Project/MGMT Labeled dataset/one'
def main_T1w():
    for file in os.listdir(source):
        sub_fold = os.path.join("", file)
        Input_Folder = source+'/'+sub_fold+'/FLAIR'
        shutil.rmtree(Input_Folder)
        Input_Folder = source+'/'+sub_fold+'/T1wCE'
        shutil.rmtree(Input_Folder)
        Input_Folder = source+'/'+sub_fold+'/T2w'
        shutil.rmtree(Input_Folder)
if __name__ == "__main__":
        main_T1w()
        
        
source = '/home/ibrar/FInal Year Project/MGMT Labeled dataset/zero'
def main_T1w():
    for file in os.listdir(source):
        sub_fold = os.path.join("", file)
        Input_Folder = source+'/'+sub_fold+'/FLAIR'
        shutil.rmtree(Input_Folder)
        Input_Folder = source+'/'+sub_fold+'/T1wCE'
        shutil.rmtree(Input_Folder)
        Input_Folder = source+'/'+sub_fold+'/T2w'
        shutil.rmtree(Input_Folder)
if __name__ == "__main__":
        main_T1w()

### Convert all images size to 224*224
The dataset contain images of different sizes, so converted all images to 128*128

In [None]:
def resize(path):
    for item in dirs:
        if os.path.isfile(path+item):
            im = Image.open(path+item)
            f, e = os.path.splitext(path+item)
            imResize = im.resize((224,224), Image.ANTIALIAS)
            imResize.save(f+'.jpg' , 'JPEG', quality=90)

source = '/home/ibrar/FInal Year Project/MGMT Labeled dataset/one'
def main_T1w():
    for file in os.listdir(source):
        sub_fold = os.path.join("", file)
        Input_Folder = source+'/'+sub_fold+'/T1w/'
        print(Input_Folder)
        resize(Input_Folder)
if __name__ == "__main__":
        main_T1w()
        
        
source = '/home/ibrar/FInal Year Project/MGMT Labeled dataset/zero'
def main_T1w():
    for file in os.listdir(source):
        sub_fold = os.path.join("", file)
        Input_Folder = source+'/'+sub_fold+'/T1w/'
        print(Input_Folder)
        resize(Input_Folder)
if __name__ == "__main__":
        main_T1w()