In [1]:
import pandas as pd
import numpy as np
from PIL import Image, ImageOps 
import matplotlib.pyplot as plt
import random
import cv2

In [26]:
df = pd.read_pickle("../Dataset_B_resized_256.pkl")

In [3]:
df.head()

Unnamed: 0,PID,Label,Image,File
0,900-00-1961,Glioma II,"[[2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
1,900-00-1961,Glioma II,"[[3, 3, 1, 0, 0, 1, 2, 1, 0, 1, 0, 1, 1, 2, 2,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
2,900-00-1961,Glioma II,"[[1, 2, 5, 5, 4, 2, 2, 4, 4, 3, 0, 0, 1, 3, 2,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
3,900-00-5382,Glioma II,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
4,900-00-5382,Glioma II,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...


In [4]:
seed = 42

In [5]:
## Splitting Data to : 75% Train set, 15% Test set, and 10% Validation set

train_DF = df.sample(frac=0.75, random_state=seed) #random state is a seed value
test_val_DF  = df.drop(train_DF.index)

test_DF = test_val_DF.sample(frac=0.6, random_state=seed) #random state is a seed value
val_DF = test_val_DF.drop(test_DF.index)

In [6]:
print("train_DF size: ", len(train_DF)/len(df)*100)
print("val_DF size  : ", len(val_DF)/len(df)*100)
print("test_DF size : ", len(test_DF)/len(df)*100)

train_DF size:  74.99297555493116
val_DF size  :  10.002809778027535
test_DF size :  15.004214667041305


In [7]:
test_DF.head()

Unnamed: 0,PID,Label,Image,File
1139,HF1490,Glioma III,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
1715,HF1397,Glioma VI,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
154,HF1000,Glioma II,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
1676,HF1397,Glioma VI,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
2368,97461,Pituitary,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1400.mat


In [8]:
val_DF[val_DF["Label"]=='Pituitary'].count()

PID      97
Label    97
Image    97
File     97
dtype: int64

In [9]:
len(test_DF[test_DF["Label"]=='Meningioma'])/ len(test_DF)

0.21348314606741572

In [10]:
len(val_DF[val_DF["Label"]=='Meningioma'])/len(val_DF)

0.21910112359550563

In [11]:
len(train_DF[train_DF["Label"]=='Meningioma'])/len(train_DF)

0.19333083551892094

In [12]:
## Augmentation:

import skimage
import skimage.transform

def mirrors(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.mirror(np_im)
    return np.asanyarray(im_agu)

## flips an image 
def flips(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.flip(np_im)
    return np.asanyarray(im_agu)

def rotate(np_im, deg):
    return skimage.transform.rotate(np_im, deg, resize=False)
    

In [13]:
mirrored_df  = train_DF.copy()
fliped_df    = train_DF.copy()
rotated_df   = train_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)

for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)
    

aug_train_DF = pd.concat([train_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [14]:
mirrored_df  = val_DF.copy()
fliped_df    = val_DF.copy()
rotated_df   = val_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)

for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)

aug_val_DF = pd.concat([val_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [15]:
mirrored_df  = test_DF.copy()
fliped_df    = test_DF.copy()
rotated_df   = test_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)


for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)

aug_test_DF = pd.concat([test_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df

In [16]:
#shuffle agumented dataframe
aug_train_DF = aug_train_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_val_DF = aug_val_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_test_DF = aug_test_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order

In [17]:
# aug_train_DF.to_pickle("aug_train_DF.pkl")
# val_DF.to_pickle("val_DF.pkl")
# test_DF.to_pickle("test_DF.pkl")

In [18]:
SIZE_X = 128 
SIZE_Y = 128

In [19]:
def imgResize(img):
    wt, ht = SIZE_X, SIZE_Y
    h, w = img.shape
    f = min(wt / w, ht / h)
    tx = (wt - w * f) / 2
    ty = (ht - h * f) / 2

    # map image into target image
    M = np.float32([[f, 0, tx], [0, f, ty]])
    target = np.ones([ht, wt]) * 255
    img = cv2.warpAffine(img, M, dsize=(wt, ht), dst=target, borderMode=cv2.BORDER_TRANSPARENT)
    return img

In [20]:
aug_train_DF["Image"] = aug_train_DF["Image"].apply(imgResize)
aug_val_DF["Image"] = aug_val_DF["Image"].apply(imgResize)
aug_test_DF["Image"] = aug_test_DF["Image"].apply(imgResize)

In [21]:
train_DF["Image"] = train_DF["Image"].apply(imgResize)
val_DF["Image"] = val_DF["Image"].apply(imgResize)
test_DF["Image"] = test_DF["Image"].apply(imgResize)

In [22]:
train_DF.to_pickle("train_DF_{}.pkl".format(SIZE_X))
val_DF.to_pickle("val_DF_{}.pkl".format(SIZE_X))
test_DF.to_pickle("test_DF_{}.pkl".format(SIZE_X))

In [23]:
aug_train_DF.to_pickle("aug_train_DF_{}.pkl".format(SIZE_X))
aug_val_DF.to_pickle("aug_val_DF_{}.pkl".format(SIZE_X))
aug_test_DF.to_pickle("aug_test_DF_{}.pkl".format(SIZE_X))

In [24]:
aug_train_DF.head()

Unnamed: 0,PID,Label,Image,File
0,HF1397,Glioma VI,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
1,107495,Pituitary,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1180.mat
2,HF1156,Glioma II,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
3,105936,Pituitary,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",710.mat
4,HF0966,Glioma III,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...


In [25]:
print("aug_train_DF size: ", len(aug_train_DF)/(len(df)*4))
print("aug_val_DF size  : ", len(aug_val_DF)/(len(df)*4))
print("aug_test_DF size : ", len(aug_test_DF)/(len(df)*4))

aug_train_DF size:  0.7499297555493116
aug_val_DF size  :  0.10002809778027535
aug_test_DF size :  0.15004214667041305
