In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.

import os

# Any results you write to the current directory are saved as output.

In [None]:
print(os.listdir("../input")) # note: the densenet-keras is added manually to implement transfer learning from densenet model

In [None]:
import tensorflow as tf
from keras import layers
from sklearn.model_selection import train_test_split
from keras.callbacks import Callback
from sklearn.metrics import cohen_kappa_score, accuracy_score
from keras.callbacks import EarlyStopping

import cv2
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.layers import (Activation, Dropout, Dense,
                          BatchNormalization, Input, GlobalAveragePooling2D)
from keras.models import Model
from keras import metrics
from keras.optimizers import Adam 
from keras.optimizers import Nadam
#from keras import regularizers, optimizers #doubtfull

from tqdm import tqdm #shows progress bar of the operation

import matplotlib.pyplot as plt
%matplotlib inline

####to be deleted####
# from keras.layers import Conv2D
# from keras.layers import MaxPooling2D
# from keras.layers import Flatten
# from keras.layers import Dense
# from keras.layers import GaussianDropout
# from keras import regularizers, optimizers
# from keras.regularizers import l1,l2


#  #this is new for me
# from PIL import Image #this is new for me


# import scipy #this is new for me






# from keras.models import Sequential, load_model

# from keras.callbacks import ModelCheckpoint
# from keras import metrics
# from keras.optimizers import Adam 
# from keras.optimizers import Nadam 

# from keras import backend as K
# import keras
# from keras.models import Model



#print("tensorflow_version=",tf.__version__)

### Set random seed for repeatability

In [None]:
np.random.seed(2019)
tf.set_random_seed(2019)

## Loading and EDA

In [None]:
train_df = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
test_df  = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')
print("train_df shape:", train_df.shape)
print("test_df shape:",test_df.shape)
train_df.head()

In [None]:
#lets look at data distribution
train_df['diagnosis'].hist()
train_df['diagnosis'].value_counts()

## Display sample images

In [None]:
def display_samples(df, columns=4, rows=3):
    fig=plt.figure(figsize=(5*columns, 4*rows))

    for i in range(columns*rows):
        image_path = df.loc[i,'id_code']
        image_id = df.loc[i,'diagnosis']
        img = cv2.imread(f'../input/aptos2019-blindness-detection/train_images/{image_path}.png')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        print('size = ',img.shape)
        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)
    
    plt.tight_layout()

display_samples(train_df,2,2)

## Resize Images
Resize to 312x312, then create a single numpy array to hold the data 

In [None]:
N = train_df.shape[0]
IMG_SIZE = 312
x_train = np.zeros((N, IMG_SIZE, IMG_SIZE,3), dtype = np.uint8)

def resize_IMG(image_path, desired_size = IMG_SIZE):
    img = cv2.imread(image_path)
    im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im,(desired_size, desired_size))
    
    return im


## resize the train set images and store in x_train array

In [None]:
for i, image_id in enumerate(tqdm(train_df['id_code'])):
    x_train[i, :, :, :] = resize_IMG(
    f'../input/aptos2019-blindness-detection/train_images/{image_id}.png')

In [None]:
# convert the y values to categorical values (0,1,2,3,4)
y_train = tf.keras.utils.to_categorical(
    train_df['diagnosis'],
    num_classes=5,
    dtype='uint8'
)
y_train[0:5] #see the first 5 rows 

In [None]:
# generate train_test split
train_x, valid_x, train_y, valid_y = train_test_split(x_train, y_train, test_size=0.20,
                                                      stratify=y_train, random_state=8)

# Augmenting dataset to generate more training samples to address class imbalance

In [None]:
# # Augmenting the training set to get more training set out of it as wells as help reduce overfitting

BATCH_SIZE = 32
datagen = ImageDataGenerator(
    zoom_range = 0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=(-3.0,3.0),
    channel_shift_range=5.0
    )

data_generator = datagen.flow(x_train, y_train, batch_size=BATCH_SIZE, seed=2019)

# Optional - only for sanity check that preprocessing is done ok

In [None]:
# lets visualize the train and test images after preprocessing and split
def display_samples_preprocess(df1,df2, columns=4, rows=3):
    fig=plt.figure(figsize=(5*columns, 4*rows))

    for i in range(columns*rows):
        train_img = df1[i] 
        #print('label:',train_y[i])
        fig.add_subplot(rows, columns, i+1)
        plt.title('train_x{}, train_y:{}'.format(i,df2[i]))
       
        plt.imshow(train_img)
    
    plt.tight_layout()



In [None]:
display_samples_preprocess(train_x,train_y,2,2)
train_x.shape

In [None]:
# make sure validation sets also looks ok
display_samples_preprocess(valid_x,valid_y,5,2)
valid_x.shape

In [None]:
# Create Callback class to implement 
class Metrics(Callback):
    """ Callback class to be used to calculate validation kappa score after each epoch and save model if higher 
    value than last saved value"""
    
    def on_train_begin(self, logs={}):
        self.val_kappas = []
    

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data[:2]
        y_pred = self.model.predict(X_val)
       
        def flatten(var):
            flat = []
            for row in tqdm(var):
                predict_label = np.argmax(row)
                flat.append(str(predict_label))
            return flat
        
        y_val = flatten(y_val)
        y_pred = flatten(y_pred)

        _val_kappa = cohen_kappa_score(
            y_val,
            y_pred, 
            weights='quadratic'
        )

        self.val_kappas.append(_val_kappa)

        print(f"val_kappa: {_val_kappa:.4f}")
        
        if _val_kappa >= max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save('../working/model.h5')

        return

In [None]:
#define the two callback functions that will be called during training
early =  EarlyStopping(monitor = "val_accuracy",
                      mode = "max",
                      patience = 20)

kappa_metrics = Metrics()

In [None]:
# Define model:

def create_model(input_shape, n_out):
    input_tensor = Input(shape = input_shape)
    base_model = DenseNet121(include_top = False,
                            weights = None,
                            input_tensor = input_tensor)
    base_model.load_weights("../input/densenet-keras/DenseNet-BC-121-32-no-top.h5")
    x = GlobalAveragePooling2D()(base_model.output) # learn about it
    x = Dense(1024, activation = 'relu')(x)
    x = Dropout(0.5)(x)
    final_output = Dense(n_out, activation = 'softmax', name = 'final_output')(x)
    model = Model(input_tensor, final_output)
    
    return model



In [None]:
# call the model:

NUM_CLASS = 5

model = create_model(
        input_shape= (IMG_SIZE, IMG_SIZE, 3),
        n_out = NUM_CLASS
                    )
model.summary()

In [None]:
# warm up training

for layer in model.layers:
    layer.trainable = False

for i in range(-2,0):
    model.layers[i].trainable = True

model.compile(
    loss = 'categorical_crossentropy',
    optimizer = Nadam(0.0001),
    metrics =['accuracy']
)
BATCH_SIZE = 32

history = model.fit_generator(data_generator,
                              steps_per_epoch=x_train.shape[0]/BATCH_SIZE,
                              epochs = 2, verbose =1,validation_data = (valid_x, valid_y),
                              validation_steps = np.int(valid_x.shape[0] / BATCH_SIZE),
                              callbacks = [kappa_metrics, early]
                               )

In [None]:
def plot_history(history):
  """ to plot training and validation accuracy trend per epoch"""  
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch
  
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  
  plt.plot(hist['epoch'], hist['accuracy'],
          label ='Train accuracy')
  
  plt.plot(hist['epoch'], hist['val_accuracy'],
           label = 'Val accuracy')
  
  plt.legend()
  #plt.ylim([0,5])
  
  
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  
  plt.plot(hist['epoch'], hist['loss'],
          label = 'Train loss')
  plt.plot(hist['epoch'], hist['val_loss'],
          label = 'Val loss')
  #plt.ylim[0,5]
  
  plt.legend()


In [None]:
# train all layers
for layer in model.layers:
    layer.trainable = True
   
model.compile(
loss = 'categorical_crossentropy',
optimizer = Nadam(0.0001),

metrics =['accuracy'])

history = model.fit_generator(data_generator,
                              steps_per_epoch=x_train.shape[0]/BATCH_SIZE,
                              epochs = 50, verbose =1, 
                              validation_data = (valid_x, valid_y),
                              validation_steps = np.int(valid_x.shape[0] / BATCH_SIZE),
                              callbacks = [kappa_metrics, early]
                               )

print('plotting history for lr=',0.0001)
plot_history(history)


In [None]:
# Load the last saved model to get the best model weights
model.load_weights('../working/model.h5')

## Get a sense of model performance using validation set

In [None]:
score_predict = model.predict(valid_x)

In [None]:
# flatten y values to get only the diagnostic Id
def flatten(var):
    flat = []
    for row in tqdm(var):
        predict_label = np.argmax(row)
    
    #print(predict_label)
        flat.append(str(predict_label))
    return flat

validation = flatten(score_predict)
print(validation[0:5])

valid_y_flat = flatten(valid_y)

print(valid_y_flat[0:5])


In [None]:
print(validation[20:50])
print(valid_y_flat[20:50])

In [None]:
def cohen_kappa(true, pred):
        val_kappa = cohen_kappa_score(
            true,
            pred, 
            weights='quadratic'
        )
        return val_kappa
#
score = cohen_kappa(valid_y_flat, validation)
print(score)

In [None]:
# do the same preprocessing for test_set
N1 = test_df.shape[0]
x_test = np.zeros((N1, IMG_SIZE, IMG_SIZE, 3), dtype = np.uint8)


In [None]:
## make predictions on the test set 
predicted = []
model.load_weights('../working/model.h5')

for i, image_id in enumerate(tqdm(test_df['id_code'])):
    x_test[i, :, :, :] = resize_IMG(
        f'../input/aptos2019-blindness-detection/test_images/{image_id}.png')
    predicted_temp = model.predict(x_test[i].reshape(1,IMG_SIZE,IMG_SIZE,3))
    #print(predicted_temp)
    predicted.append(predicted_temp)
    
# note: test set is inferenced by each example, not on entire set (storing the full test set after resizing 
# causes memory overload)

In [None]:
#flatten the predicted to get the predicted_lable as a column vector
predicted_label = flatten(predicted)

In [None]:
#Generate the submission file
submit = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
submit['diagnosis'] = predicted_label
submit.to_csv('submission.csv', index=False)
submit.head()

In [None]:
submit['diagnosis'].value_counts()