In [31]:
import pandas as pd
import numpy as np
from PIL import Image, ImageOps 
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import random
import cv2
import seaborn as sns
import os

In [2]:
tumor_df = pd.read_pickle("../Dataset_B_resized_256.pkl")
healthy_df = pd.read_pickle(r"./Healthy_dataset.pkl")
df = pd.concat([tumor_df, healthy_df])

In [3]:
df.head()

Unnamed: 0,PID,Label,Image,File
0,900-00-1961,Glioma II,"[[2, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
1,900-00-1961,Glioma II,"[[3, 3, 1, 0, 0, 1, 2, 1, 0, 1, 0, 1, 1, 2, 2,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
2,900-00-1961,Glioma II,"[[1, 2, 5, 5, 4, 2, 2, 4, 4, 3, 0, 0, 1, 3, 2,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
3,900-00-5382,Glioma II,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
4,900-00-5382,Glioma II,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...


In [4]:
df.Label.unique()

array(['Glioma II', 'Glioma III', 'Glioma VI', 'Meningioma', 'Pituitary',
       'Healthy'], dtype=object)

In [5]:
seed = 42

In [6]:
## Splitting Data to : 75% Train set, 15% Test set, and 10% Validation set

train_DF = df.sample(frac=0.75, random_state=seed) #random state is a seed value
test_val_DF  = df.drop(train_DF.index)

test_DF = test_val_DF.sample(frac=0.6, random_state=seed) #random state is a seed value
val_DF = test_val_DF.drop(test_DF.index)

In [7]:
print("train_DF size: ", len(train_DF)/len(df)*100)
print("val_DF size  : ", len(val_DF)/len(df)*100)
print("test_DF size : ", len(test_DF)/len(df)*100)

train_DF size:  75.0053728777133
val_DF size  :  5.974640017193209
test_DF size :  10.251450676982591


In [8]:
test_DF.head()

Unnamed: 0,PID,Label,Image,File
3482,107248,Meningioma,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",93.mat
3374,106284,Meningioma,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",65.mat
853,HF1185,Glioma III,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...
2895,100572,Meningioma,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",220.mat
880,HF1185,Glioma III,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...


In [9]:
val_DF[val_DF["Label"]=='Pituitary'].count()

PID      95
Label    95
Image    95
File     95
dtype: int64

In [10]:
len(test_DF[test_DF["Label"]=='Meningioma'])/ len(test_DF)

0.22641509433962265

In [11]:
len(val_DF[val_DF["Label"]=='Meningioma'])/len(val_DF)

0.2949640287769784

In [12]:
len(train_DF[train_DF["Label"]=='Meningioma'])/len(train_DF)

0.1484240687679083

In [13]:
## Augmentation:

import skimage
import skimage.transform

def mirrors(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.mirror(np_im)
    return np.asanyarray(im_agu)

## flips an image 
def flips(np_im):
    np_im = Image.fromarray(np_im)
    im_agu = ImageOps.flip(np_im)
    return np.asanyarray(im_agu)

def rotate(np_im, deg):
    return skimage.transform.rotate(np_im, deg, resize=False)
    

In [14]:
mirrored_df  = train_DF.copy()
fliped_df    = train_DF.copy()
rotated_df   = train_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)

for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)
    

aug_train_DF = pd.concat([train_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [15]:
mirrored_df  = val_DF.copy()
fliped_df    = val_DF.copy()
rotated_df   = val_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)

for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)

aug_val_DF = pd.concat([val_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df


In [16]:
mirrored_df  = test_DF.copy()
fliped_df    = test_DF.copy()
rotated_df   = test_DF.copy()

mirrored_df['Image'] = mirrored_df['Image'].apply(mirrors)

fliped_df['Image'] = fliped_df['Image'].apply(mirrors)


for index, row in rotated_df.iterrows():
    deg = random.randint(-45 ,45)
    row["Image"] = rotate(row["Image"], deg)

aug_test_DF = pd.concat([test_DF, mirrored_df, fliped_df, rotated_df], axis=0, ignore_index = True)
del mirrored_df
del fliped_df
del rotated_df

In [17]:
#shuffle agumented dataframe
aug_train_DF = aug_train_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_val_DF = aug_val_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order
aug_test_DF = aug_test_DF.sample(frac=1).reset_index(drop=True) #Use if you want to reset index order

In [18]:
# aug_train_DF.to_pickle("aug_train_DF.pkl")
# val_DF.to_pickle("val_DF.pkl")
# test_DF.to_pickle("test_DF.pkl")

In [19]:
SIZE_X = 128 
SIZE_Y = 128

In [20]:
from skimage.transform import resize
def imgResize(img):
      return resize(img,(SIZE_X, SIZE_Y))

In [21]:
aug_train_DF["Image"] = aug_train_DF["Image"].apply(imgResize)
aug_val_DF["Image"] = aug_val_DF["Image"].apply(imgResize)
aug_test_DF["Image"] = aug_test_DF["Image"].apply(imgResize)

In [22]:
# aug_train_DF.to_pickle("aug_train_DF_{}.pkl".format(SIZE_X))
# aug_val_DF.to_pickle("aug_val_DF_{}.pkl".format(SIZE_X))
# aug_test_DF.to_pickle("aug_test_DF_{}.pkl".format(SIZE_X))

In [23]:
aug_train_DF.head()

Unnamed: 0,PID,Label,Image,File
0,103582,Pituitary,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1507.mat
1,Heatly_dataset_#998,Healthy,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",no44.jpg
2,110537,Pituitary,"[[0.0019607843137256016, 0.0019607843137256016...",1228.mat
3,101017,Pituitary,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1026.mat
4,HF1489,Glioma II,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",D:\Datasets\manifest-tXudPFfp43472957274060920...


In [24]:
print("aug_train_DF size: ", len(aug_train_DF)/(len(df)*4))
print("aug_val_DF size  : ", len(aug_val_DF)/(len(df)*4))
print("aug_test_DF size : ", len(aug_test_DF)/(len(df)*4))

aug_train_DF size:  0.750053728777133
aug_val_DF size  :  0.059746400171932085
aug_test_DF size :  0.10251450676982592


In [35]:
dest = r"./Sample_2"
count = 1
for index, row in test_DF.sample(20, random_state=seed).iterrows():
    img_name = "{}_{}.png".format(count, row["Label"])
    plt.imsave(os.path.join(dest, img_name), row["Image"], cmap=cm.gray)
    count +=1


'./Sample\\adfas.png'