In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import cv2
import shutil
import os
import seaborn as sns

from keras import layers
from tensorflow.keras import applications 
from keras.applications import MobileNetV2
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.optimizers import Adam

from sklearn.svm import SVC
from sklearn.preprocessing import label_binarize
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_curve, auc
from sklearn.multiclass import OneVsOneClassifier
from scipy import interp

from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score, confusion_matrix

from tqdm import tqdm

In [None]:
!kaggle --version #make sure it's 1.5.8 if data is downloaded strangely

In [None]:
!kaggle competitions download -c aptos2019-blindness-detection

In [None]:
train_df = pd.read_csv('train_data.csv')
val_df = pd.read_csv('val_data.csv')
print(train_df.shape)
print(val_df.shape)

In [None]:
train_df['diagnosis'].value_counts()
train_df['diagnosis'].hist()
print(train_df.head())
print(train_df.shape)

In [None]:
from sklearn.utils import resample

X = train_df
normal = X[X.diagnosis==0]
mild = X[X.diagnosis==1]
moderate = X[X.diagnosis==2]
severe = X[X.diagnosis==3]
pdr = X[X.diagnosis==4]

normal = resample(normal,
                  replace=False,
                  n_samples=700,
                  random_state=2)
mild = resample(mild,
                replace=True, 
                n_samples=700,
                random_state=2)
moderate = resample(moderate,
                    replace=False,
                    n_samples=700,
                    random_state=2)
severe = resample(severe,
                  replace=True,
                  n_samples=700,
                  random_state=2)
pdr = resample(pdr,
               replace=True,
               n_samples=700,
               random_state=2)

# combine
sampled = pd.concat([normal, mild, moderate, severe, pdr])

sampled_train_df = sampled
sampled_train_df = sampled_train_df.sample(frac=1).reset_index(drop=True)
print(sampled_train_df.shape)

In [None]:
PATH_TO_DATA = '' # Put the path to the Aptos 2019 Kaggle data containing all the images here 

def resize_image(image):
  return cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)

# Normalize to range [0, 1]
def normalize_image(image):
  image = resize_image(image)
  return (image - np.min(image)) / (np.max(image) - np.min(image))

x_train = np.empty((sampled_train_df.shape[0], 224, 224 ,3), dtype=np.float32)
for idx, image_path in enumerate(tqdm(sampled_train_df['id_code'])):
  image = normalize_image(cv2.cvtColor(cv2.imread(f'{PATH_TO_DATA}/{image_path}.png'), cv2.COLOR_BGR2RGB))
  x_train[idx, :, :, :] = image

x_val = np.empty((val_df.shape[0], 224, 224 ,3), dtype=np.float32)
for idx, image_path in enumerate(tqdm(val_df['id_code'])):
  image = normalize_image(cv2.cvtColor(cv2.imread(f'{PATH_TO_DATA}/{image_path}.png'), cv2.COLOR_BGR2RGB))
  x_val[idx, :, :, :] = image

y_train = sampled_train_df['diagnosis']
y_val = val_df['diagnosis']

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

In [None]:
BATCH_SIZE = 32

data_generator = ImageDataGenerator(zoom_range=0.1, rotation_range=360, fill_mode='constant', cval=0.,
                                    horizontal_flip=True, vertical_flip=True)
data_generator = data_generator.flow(x_train, y_train, batch_size=BATCH_SIZE, seed=2)

In [None]:
class Metrics(Callback):
  def __init__(self, validation_data):
    self.validation_data = validation_data

  def on_train_begin(self, logs={}):
    self.val_kappas = []

  def on_epoch_end(self, epoch, logs={}):
    X_val, y_val = self.validation_data[:2]
    
    y_pred = self.model.predict(X_val)
    y_pred = np.clip(y_pred,0,4)
    y_pred = y_pred.astype(int)

    _val_kappa = cohen_kappa_score(
        y_val,
        y_pred, 
        weights='quadratic'
    )

    self.val_kappas.append(_val_kappa)

    print(f"val_kappa: {_val_kappa:.4f}")
    
    if _val_kappa == max(self.val_kappas):
        print("Validation Kappa has improved. Saving model.")
        self.model.save('model.h5')

    return
    
kappa_metrics = Metrics([x_val, y_val])

In [None]:
mobilenet = MobileNetV2(
    alpha = 1.3,
    weights='mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.3_224_no_top.h5',
    include_top=False,
    input_shape=(224,224,3)
)

model = Sequential()
model.add(mobilenet)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(256))
model.add(layers.Dense(256))
model.add(layers.Dense(1))

model.compile(
    loss='mse',
    optimizer=Adam(lr=0.0001),
    metrics=['accuracy']
)

model.summary()

In [None]:
history = model.fit_generator(
    data_generator,
    steps_per_epoch = x_train.shape[0] / BATCH_SIZE,
    epochs = 100,
    validation_data = (x_val, y_val),
    callbacks = [kappa_metrics]
)