In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [1]:
%tensorflow_version 2.x

In [2]:
import json
import math
import os

import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications import DenseNet121
#from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
import tensorflow as tf
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tqdm import tqdm

%matplotlib inline

Using TensorFlow backend.


In [3]:
from numpy import load
x_train = load('/content/gdrive/My Drive/APTOS1/x_train.npy')
x_test = load('/content/gdrive/My Drive/APTOS1/x_test.npy')

In [4]:
train_df = pd.read_csv('/content/gdrive/My Drive/APTOS1/train.csv')
test_df = pd.read_csv('/content/gdrive/My Drive/APTOS1/test.csv')

In [5]:
train_df.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7,2
1,001639a390f0,4
2,0024cdab0c1e,1
3,002c21358ce6,0
4,005b95c28852,0


In [6]:
y_train =train_df['diagnosis'].values

In [7]:
y_train.shape

(3662,)

In [8]:
def crop_image_from_gray(img, tol=7):
    """
    Applies masks to the orignal image and 
    returns the a preprocessed image with 
    3 channels
    
    :param img: A NumPy Array that will be cropped
    :param tol: The tolerance used for masking
    
    :return: A NumPy array containing the cropped image
    """
    # If for some reason we only have two channels
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    # If we have a normal RGB images
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

In [9]:
def preprocess_image(image, sigmaX=10):
    """
    The whole preprocessing pipeline:
    1. Read in image
    2. Apply masks
    3. Resize image to desired size
    4. Add Gaussian noise to increase Robustness
    
    :param img: A NumPy Array that will be cropped
    :param sigmaX: Value used for add GaussianBlur to the image
    
    :return: A NumPy array containing the preprocessed image
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (224, 224))
    image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
    return image

In [12]:
BATCH_SIZE = 64


train_datagen = ImageDataGenerator(
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   preprocessing_function= preprocess_image, 
                                   rescale=1 / 255.)

test_datagen = ImageDataGenerator( horizontal_flip=True,
                                   vertical_flip=True,
                                   preprocessing_function= preprocess_image, 
                                   rescale=1 / 255.)

tdata_generator = train_datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, seed=2020)
vdata_generator = test_datagen.flow(x_val, y_val, batch_size=BATCH_SIZE, seed=2020)

In [13]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Activation

In [20]:
model1 = tf.keras.Sequential()

model1.add(Conv2D(32, (3, 3), padding='same', input_shape=(224,224,3)))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Conv2D(64, (3, 3), padding='same'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Conv2D(128, (3, 3), padding='same'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Conv2D(256, (3, 3), padding='same'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Conv2D(512, (3, 3), padding='same'))
model1.add(Activation('relu'))
model1.add(MaxPooling2D(pool_size=(2, 2)))

model1.add(Flatten())
model1.add(Dense(2024))
model1.add(Activation('relu'))
model1.add(Dropout(0.5))

model1.add(Flatten())
model1.add(Dense(512))
model1.add(Activation('relu'))
model1.add(Dropout(0.5))

model1.add(Dense(50, activation='elu'))
model1.add(Dense(1, activation='linear'))

model1.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 224, 224, 32)      896       
_________________________________________________________________
activation_14 (Activation)   (None, 224, 224, 32)      0         
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 112, 112, 64)      18496     
_________________________________________________________________
activation_15 (Activation)   (None, 112, 112, 64)      0         
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 56, 56, 128)      

In [28]:
densenet = tf.keras.applications.DenseNet121(
    weights='imagenet',
    include_top=False,
    input_shape=(224,224,3)
)

In [29]:
model1 = tf.keras.Sequential()
model1.add(densenet)
model1.add(tf.keras.layers.GlobalAveragePooling2D())
model1.add(tf.keras.layers.Dropout(0.5))

model1.add(tf.keras.layers.Dense(512, activation='relu'))
model1.add(tf.keras.layers.Dropout(0.5))

model1.add(tf.keras.layers.Dense(256, activation='relu'))
model1.add(tf.keras.layers.Dropout(0.5))

model1.add(tf.keras.layers.Dense(5, activation='elu'))
model1.add(tf.keras.layers.Dense(1, activation='linear'))


In [30]:
model1.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Model)          (None, 7, 7, 1024)        7037504   
_________________________________________________________________
global_average_pooling2d_2 ( (None, 1024)              0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)              

In [21]:
model1.compile(loss='mse',optimizer= tf.keras.optimizers.Adam(0.001),metrics=['mse','accuracy'])

In [11]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, 
    test_size=0.15, 
    random_state=2020
)

In [21]:
class QWKEvaluation(Callback):
    def __init__(self, validation_data, batch_size=32, interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.batch_size = batch_size
        self.y_vals = validation_data
        self.history = []

    def on_epoch_end(self, epoch, logs={}):
        X_val = self.y_vals[0]
        y_val = self.y_vals[1]

        if epoch % self.interval == 0:
            y_pred = self.model.predict(X_val,verbose = 1)

            y_pred = np.rint(y_pred).astype(np.uint8).clip(0, 4)
            
            score = cohen_kappa_score(y_val,
                                      y_pred,
                                      weights='quadratic')
            print("\n epoch: %d - QWK_score: %.6f \n" % (epoch+1, score))
            self.history.append(score)
            if score >= max(self.history):
                print('saving checkpoint: ', score)
                self.model.save('densenet_bestqwk.h5')

qwk = QWKEvaluation(validation_data=(x_val, y_val),
                    batch_size=128, interval=1)

In [16]:
x_val.shape

(550, 224, 224, 3)

In [17]:
x_train.shape

(3112, 224, 224, 3)

In [20]:
x_val1 = np.zeros((550, 224, 224, 3),dtype=float)
for i in range(len(x_val)):
  x_val1[i]=preprocess_image(x_val[i])

In [21]:
x_train1 = np.zeros((3112, 224, 224, 3),dtype=float)
for i in range(len(x_train)):
  x_train1[i]=preprocess_image(x_train[i])

In [32]:
x_val1.shape

(550, 224, 224, 3)

In [33]:
x_train1.shape

(3112, 224, 224, 3)

In [22]:
model1.fit(x=x_train/255., y=y_train,batch_size=128, epochs=50, validation_data=(x_val/255.,y_val),callbacks=[])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50

KeyboardInterrupt: ignored

In [32]:
model1.predict(x_train[1:20])

array([[3.0088634 ],
       [2.0028012 ],
       [0.02011839],
       [2.656844  ],
       [0.02011839],
       [0.02011839],
       [2.6272943 ],
       [1.915221  ],
       [0.02011839],
       [2.4760973 ],
       [1.9873918 ],
       [0.02011839],
       [0.01910734],
       [1.9853209 ],
       [2.018503  ],
       [0.02011839],
       [0.02011839],
       [1.9971044 ],
       [0.02011839]], dtype=float32)