In [1]:
import pandas as pd
import numpy as np
from PIL import Image, ImageOps 
import matplotlib.pyplot as plt
import random
import cv2

In [2]:
df = pd.read_pickle("../Dataset_A_resized_256.pkl")

In [3]:
df.head()

Unnamed: 0,PID,Images,Tumor Mask,Tumor Border,Labels,File name
0,100360,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[267.6152450090744, 231.37568058076226, 277.83...",Meningioma,1.mat
1,101016,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[248.86411149825784, 256.89198606271776, 238.1...",Meningioma,10.mat
2,107494,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[193.26370732550265, 175.8076305348121, 185.15...",Meningioma,100.mat
3,112649,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[225.95188511210213, 231.1718956592464, 233.40...",Pituitary,1000.mat
4,112649,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[231.61100841767436, 237.4012836281162, 241.02...",Pituitary,1001.mat


In [4]:
df = df.loc[:, ["PID", "Images", "Tumor Mask", "Labels"]]

In [5]:
seed = 42

In [6]:
## Splitting Data to : 75% Train set, 15% Test set, and 10% Validation set

train_DF = df.sample(frac=0.75, random_state=seed) #random state is a seed value
test_val_DF  = df.drop(train_DF.index)

test_DF = test_val_DF.sample(frac=0.6, random_state=seed) #random state is a seed value
val_DF = test_val_DF.drop(test_DF.index)

In [7]:
print("train_DF size: ", len(train_DF)/len(df)*100)
print("val_DF size  : ", len(val_DF)/len(df)*100)
print("test_DF size : ", len(test_DF)/len(df)*100)

train_DF size:  75.0
val_DF size  :  9.986945169712794
test_DF size :  15.013054830287206


In [8]:
test_DF.head()

Unnamed: 0,PID,Images,Tumor Mask,Labels
1696,MR017260F,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Glioma
2653,113435,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Meningioma
1408,MR049358,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Glioma
2300,106720,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Meningioma
1348,MR040240B,"[[1.5259021896696422e-05, 1.5259021896696422e-...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Glioma


In [9]:
val_DF[val_DF["Labels"]=='Pituitary'].count()

PID           90
Images        90
Tumor Mask    90
Labels        90
dtype: int64

In [10]:
len(test_DF[test_DF["Labels"]=='Meningioma'])/ len(test_DF)

0.21956521739130436

In [11]:
len(val_DF[val_DF["Labels"]=='Meningioma'])/len(val_DF)

0.2222222222222222

In [12]:
len(train_DF[train_DF["Labels"]=='Meningioma'])/len(train_DF)

0.23455178416013925

In [13]:
## Augmentation:

import skimage
import skimage.transform

def mirrors(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.mirror(np_im)
    return np.asanyarray(im_agu)

## flips an image 
def flips(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.flip(np_im)
    return np.asanyarray(im_agu)

def rotate(np_im, deg):
    return skimage.transform.rotate(np_im, deg, resize=False)
    

In [14]:
img = df["Images"][0]
mask = df["Tumor Mask"][0]

In [15]:
mirrored_df  = train_DF.copy()
fliped_df    = train_DF.copy()
rotated_df   = train_DF.copy()

mirrored_df['Images'] = mirrored_df['Images'].apply(mirrors)
mirrored_df['Tumor Mask'] = mirrored_df['Tumor Mask'].apply(mirrors)

fliped_df['Images'] = fliped_df['Images'].apply(mirrors)
fliped_df['Tumor Mask'] = fliped_df['Tumor Mask'].apply(mirrors)


for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Images"] = rotate(row["Images"], deg)
    row["Tumor Mask"] = np.round(rotate(row["Tumor Mask"], deg))

aug_train_DF = pd.concat([train_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [16]:
mirrored_df  = val_DF.copy()
fliped_df    = val_DF.copy()
rotated_df   = val_DF.copy()

mirrored_df['Images'] = mirrored_df['Images'].apply(mirrors)
mirrored_df['Tumor Mask'] = mirrored_df['Tumor Mask'].apply(mirrors)

fliped_df['Images'] = fliped_df['Images'].apply(mirrors)
fliped_df['Tumor Mask'] = fliped_df['Tumor Mask'].apply(mirrors)


for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Images"] = rotate(row["Images"], deg)
    row["Tumor Mask"] = np.round(rotate(row["Tumor Mask"], deg))

aug_val_DF = pd.concat([val_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [17]:
mirrored_df  = test_DF.copy()
fliped_df    = test_DF.copy()
rotated_df   = test_DF.copy()

mirrored_df['Images'] = mirrored_df['Images'].apply(mirrors)
mirrored_df['Tumor Mask'] = mirrored_df['Tumor Mask'].apply(mirrors)

fliped_df['Images'] = fliped_df['Images'].apply(mirrors)
fliped_df['Tumor Mask'] = fliped_df['Tumor Mask'].apply(mirrors)


for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Images"] = rotate(row["Images"], deg)
    row["Tumor Mask"] = np.round(rotate(row["Tumor Mask"], deg))

aug_test_DF = pd.concat([test_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [18]:
#shuffle agumented dataframe
aug_train_DF = aug_train_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_val_DF = aug_val_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_test_DF = aug_test_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order

In [19]:
# aug_train_DF.to_pickle("aug_train_DF.pkl")
# val_DF.to_pickle("val_DF.pkl")
# test_DF.to_pickle("test_DF.pkl")

In [20]:
SIZE_X = 128 
SIZE_Y = 128

In [21]:
def imgResize(img):
    wt, ht = SIZE_X, SIZE_Y
    h, w = img.shape
    f = min(wt / w, ht / h)
    tx = (wt - w * f) / 2
    ty = (ht - h * f) / 2

    # map image into target image
    M = np.float32([[f, 0, tx], [0, f, ty]])
    target = np.ones([ht, wt]) * 255
    img = cv2.warpAffine(img, M, dsize=(wt, ht), dst=target, borderMode=cv2.BORDER_TRANSPARENT)
    return img

In [22]:
aug_train_DF["Images"] = aug_train_DF["Images"].apply(imgResize)
aug_train_DF["Tumor Mask"] = aug_train_DF["Tumor Mask"].apply(imgResize)

In [23]:
aug_val_DF["Images"] = aug_val_DF["Images"].apply(imgResize)
aug_val_DF["Tumor Mask"] = aug_val_DF["Tumor Mask"].apply(imgResize)

In [24]:
aug_test_DF["Images"] = aug_test_DF["Images"].apply(imgResize)
aug_test_DF["Tumor Mask"] = aug_test_DF["Tumor Mask"].apply(imgResize)

In [25]:
train_DF["Images"] = train_DF["Images"].apply(imgResize)
train_DF["Tumor Mask"] = train_DF["Tumor Mask"].apply(imgResize)

In [26]:
val_DF["Images"] = val_DF["Images"].apply(imgResize)
val_DF["Tumor Mask"] = val_DF["Tumor Mask"].apply(imgResize)

In [27]:
test_DF["Images"] = test_DF["Images"].apply(imgResize)
test_DF["Tumor Mask"] = test_DF["Tumor Mask"].apply(imgResize)

In [28]:
np.unique(aug_train_DF["Tumor Mask"].tolist())

array([0., 1.])

In [29]:
np.unique(aug_val_DF["Tumor Mask"].tolist())

array([0., 1.])

In [30]:
np.unique(aug_test_DF["Tumor Mask"].tolist())

array([0., 1.])

In [31]:
train_DF.to_pickle("train_DF_{}.pkl".format(SIZE_X))
val_DF.to_pickle("val_DF_{}.pkl".format(SIZE_X))
test_DF.to_pickle("test_DF_{}.pkl".format(SIZE_X))

In [32]:
aug_train_DF.to_pickle("aug_train_DF_{}.pkl".format(SIZE_X))
aug_val_DF.to_pickle("aug_val_DF_{}.pkl".format(SIZE_X))
aug_test_DF.to_pickle("aug_test_DF_{}.pkl".format(SIZE_X))

In [33]:
aug_train_DF.head()

Unnamed: 0,PID,Images,Tumor Mask,Labels
0,101016,"[[1.5259022e-05, 1.5259022e-05, 1.5259022e-05,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Meningioma
1,100820,"[[1.5259022e-05, 1.5259022e-05, 1.5259022e-05,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Glioma
2,103670,"[[1.5259022e-05, 1.5259022e-05, 1.5259022e-05,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Pituitary
3,103671,"[[1.5259022e-05, 4.5777066e-05, 4.5777066e-05,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Pituitary
4,105374,"[[1.5259022e-05, 1.5259022e-05, 1.5259022e-05,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",Meningioma


In [37]:
print("aug_train_DF size: ", len(aug_train_DF)/(len(df)*4))
print("aug_val_DF size  : ", len(aug_val_DF)/(len(df)*4))
print("aug_test_DF size : ", len(aug_test_DF)/(len(df)*4))

aug_train_DF size:  0.75
aug_val_DF size  :  0.09986945169712794
aug_test_DF size :  0.15013054830287206
