In [1]:
path_to_accuracy = 'accuracies_our_model5.npy'
path_to_f1score = 'f1score_our_model5.npy'
path_to_test_indices = 'test_indices.npy'
path_to_train_indices = 'train_indices.npy'
path_to_X = 'X.npy'
path_to_Y = 'Y.npy'

In [2]:
!pip uninstall -y enum34
!pip install gdown
!pip install pillow
!pip install scikit-learn

Uninstalling enum34-1.1.6:
  Successfully uninstalled enum34-1.1.6
Collecting gdown
  Downloading https://files.pythonhosted.org/packages/50/21/92c3cfe56f5c0647145c4b0083d0733dd4890a057eb100a8eeddf949ffe9/gdown-3.12.2.tar.gz
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hCollecting tqdm (from gdown)
[?25l  Downloading https://files.pythonhosted.org/packages/8a/54/115f0c28a61d56674c3a5e05c46d6c3523ad196e1dcd3e2d8b119026df36/tqdm-4.54.1-py2.py3-none-any.whl (69kB)
[K     |████████████████████████████████| 71kB 12.4MB/s eta 0:00:01
[?25hCollecting requests[socks] (from gdown)
[?25l  Downloading https://files.pythonhosted.org/packages/39/fc/f91eac5a39a65f75a7adb58eac7fa78871ea9872283fb9c44e6545998134/requests-2.25.0-py2.py3-none-any.whl (61kB)
[K     |████████████████████████████████| 61kB 17.5MB/s eta 0:00:01
[?25hCollecting filelock (from gdown)
  Downloading https://f

In [3]:
!gdown --id 173oK0fD29D-opBV6v-GvBBG4TE4FF-mB
!unzip -q lymphoma-kaggle.zip

Downloading...
From: https://drive.google.com/uc?id=173oK0fD29D-opBV6v-GvBBG4TE4FF-mB
To: /notebooks/lymphoma-kaggle.zip
1.44GB [00:34, 42.0MB/s]


In [4]:
# This cell should output the graphic card details
!nvidia-smi

Fri Dec 11 08:48:14 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 450.36.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro P5000        On   | 00000000:00:05.0 Off |                  Off |
| 26%   22C    P8     6W / 180W |      1MiB / 16278MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [5]:
# Checking if GPU is detected or not
# This cell should output: 
# Default GPU Device: /device:GPU:0
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device: /device:GPU:0


In [6]:
import numpy as np
import os
from tensorflow.keras.preprocessing import image
from PIL import Image
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle

In [7]:
total = 150 # For each class type, we will have 'total' number of images
rootdir = 'lymphoma-kaggle'

In [8]:
# This function applies data augmentation

def get_req_dataset(X):
    req = total - len(X)
    X_array = np.array(X)
    datagen = ImageDataGenerator(brightness_range=[0.5,1.5], horizontal_flip=True, vertical_flip=True)
    aug_iter = datagen.flow(X_array, batch_size=1)
 
    for i in range(req):
        curr_aug_img = next(aug_iter)[0].astype('uint8')
        X.append(curr_aug_img)

    return X

In [9]:
def getDataFromFile():
    
    X_CLL = []
    X_FL = []
    X_MCL = []
    
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            curr_img_path = os.path.join(subdir, file)
            curr_img_path = os.path.normpath(curr_img_path)
            img = image.load_img(curr_img_path)
            img = image.img_to_array(img)
            className = curr_img_path.split('/')[1]
            if(className == 'CLL'):
                X_CLL.append(img)
            elif(className == 'FL'):
                X_FL.append(img)
            elif(className == 'MCL'):
                X_MCL.append(img)
    
    X_CLL = get_req_dataset(X_CLL)
    X_FL = get_req_dataset(X_FL)
    X_MCL = get_req_dataset(X_MCL)
    
    return X_CLL, X_FL, X_MCL

In [10]:
def getXY():
    
    X_CLL, X_FL, X_MCL = getDataFromFile()
    
    XX = []
    YY = []

    classes = {}
    classes['CLL'] = 0
    classes['FL'] = 1
    classes['MCL'] = 2

    for i in range(len(X_CLL)):
        XX.append(X_CLL[i])
        YY.append(classes['CLL'])

    for i in range(len(X_FL)):
        XX.append(X_FL[i])
        YY.append(classes['FL'])

    for i in range(len(X_MCL)):
        XX.append(X_MCL[i])
        YY.append(classes['MCL'])

    X = np.array(XX)
    Y = np.array(YY)
    
    return X, Y

In [11]:
def getFinalDataset():
    X, Y = getXY()
    # Currently we first have 150 CCL, 150 FL and 150 MCL in the same order  
    # Shuffle them to get random order
    X, Y = shuffle(X, Y, random_state = 0)
    return X, Y

In [12]:
X, Y = getFinalDataset()
print(X.shape)
print(Y.shape)

(450, 1040, 1388, 3)
(450,)


In [13]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

train_indices = []s
test_indices = []

for train_index, test_index in kfold.split(X, Y):
    train_indices.append(train_index)
    test_indices.append(test_index)
    

In [14]:
accuracies = []
f1scores = []

In [15]:
train_indices = np.array(train_indices)
test_indices = np.array(test_indices)
accuracies = np.array(accuracies)
f1scores = np.array(f1scores)

print(train_indices.shape)
print(test_indices.shape)
print(accuracies.shape)
print(accuracies)
print(f1scores.shape)
print(f1scores)

(5, 360)
(5, 90)
(0,)
[]
(0,)
[]


In [16]:
np.save(path_to_train_indices, train_indices)
np.save(path_to_test_indices, test_indices)
np.save(path_to_accuracy, accuracies)
np.save(path_to_f1score, f1scores)
np.save(path_to_X, X)

Y = to_categorical(Y)
np.save(path_to_Y, Y)

In [17]:
ss = np.load(path_to_X)
print(ss.shape)

tt = np.load(path_to_Y)
print(tt.shape)

(450, 1040, 1388, 3)
(450, 3)
