In [135]:
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.metrics import mean_squared_error
import keras
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten,Conv2D, ZeroPadding2D, Convolution2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
from tensorflow.keras.layers import MaxPooling2D
from keras.layers import Input, Concatenate, Conv2D, Flatten, Dense, BatchNormalization
from keras.models import Model
import tensorflow as tf
import cv2
import pandas as pd
import numpy as np
import os
from tensorflow.keras import activations


In [136]:
IMG_SIZE = 256
image_input = Input((IMG_SIZE, IMG_SIZE, 3), name='Image')

x = Conv2D(filters=3, kernel_size=(11, 11), strides=(2, 2), padding='valid', activation='relu')(image_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)
x = Conv2D(filters=96, kernel_size=(5, 5), strides=(2, 2), padding='valid', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)
x = Conv2D(filters=192, kernel_size=(3, 3), strides=(2, 2), padding='valid', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
resOut = BatchNormalization()(x)

flat_layer = Flatten()(resOut)
dense2 = Dense(64, activation='relu')(flat_layer)
output = Dense(1, activation='sigmoid')(dense2)

# define a model with a list of two inputs
model = Model(inputs=[image_input], outputs=output)

model.compile(
    optimizer="adam",
    loss='binary_crossentropy',
    metrics=['acc', tf.keras.metrics.AUC()]
)


In [137]:
model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Image (InputLayer)           [(None, 256, 256, 3)]     0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 123, 123, 3)       1092      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 61, 61, 3)         0         
_________________________________________________________________
batch_normalization_15 (Batc (None, 61, 61, 3)         12        
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 29, 29, 96)        7296      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 96)        0         
_________________________________________________________________
batch_normalization_16 (Batc (None, 14, 14, 96)        384 

In [138]:
from tensorflow.keras.utils import plot_model

# plot_model(model)

## Dataframe

In [139]:
# ROOT_DIR = '/content/drive/My Drive'
# dataset = os.path.join(ROOT_DIR, 'ForKaggle')
cropped = '../input/cropped-data/Cropped_Data'
df_path = '../input/cropped-data/origa_g1020_refuge.csv'
data = pd.read_csv(df_path)
data.head()

Unnamed: 0,Image,Source,Cropped,CDR,Ecc-Cup,Ecc-Disc,Label
0,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.7097,0.636027,0.580909,0
1,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.6953,0.575024,0.608413,0
2,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.9629,0.299303,0.262828,0
3,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.7246,0.503186,0.483273,0
4,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.6138,0.500141,0.283467,0


**WARNING: There are missing values (very few i hope) in the Ecc-Cup column!!!**

In [140]:
for index, row in data.iterrows():
  if np.isnan(row['CDR']):
    data.at[index, 'CDR'] = 0.5
  if np.isnan(row['Ecc-Cup']):
    data.at[index, 'Ecc-Cup'] = 0.5

In [141]:
data.tail()

Unnamed: 0,Image,Source,Cropped,CDR,Ecc-Cup,Ecc-Disc,Label
2059,../input/refuge-cropped-data/train_cropped/ima...,REFUGE,True,0.535152,0.557495,0.539155,0
2060,../input/refuge-cropped-data/train_cropped/ima...,REFUGE,True,0.423169,0.259729,0.197245,0
2061,../input/refuge-cropped-data/train_cropped/ima...,REFUGE,True,0.48338,0.264281,0.550415,0
2062,../input/refuge-cropped-data/train_cropped/ima...,REFUGE,True,0.421299,0.306211,0.266774,0
2063,../input/refuge-cropped-data/train_cropped/ima...,REFUGE,True,0.443038,0.269389,0.448677,0


../input/cropped-data/Cropped_Data-20220623T201321Z-001/Cropped_Data/ORIGA/Images/001.jpg

In [142]:
data.head()

Unnamed: 0,Image,Source,Cropped,CDR,Ecc-Cup,Ecc-Disc,Label
0,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.7097,0.636027,0.580909,0
1,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.6953,0.575024,0.608413,0
2,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.9629,0.299303,0.262828,0
3,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.7246,0.503186,0.483273,0
4,../input/cropped-data/Cropped_Data-20220623T20...,Origa,True,0.6138,0.500141,0.283467,0


## Train Model

In [143]:
g1020_inds = []
origa_inds = []
refuge_inds = []
for index, row in data.iterrows():
    if row['Source'] == 'G1020': g1020_inds.append(index)
    elif row['Source'] == 'Origa': origa_inds.append(index)
    elif row['Source'] == 'REFUGE': refuge_inds.append(index)

In [144]:
origa = data.loc[origa_inds]
g1020 = data.loc[g1020_inds]
refuge = data.loc[refuge_inds]

In [145]:
len(origa['Image'])

650

In [146]:
len(g1020['Image'])

1014

In [147]:
len(refuge['Image'])

400

In [148]:
TEST = 'REFUGE'
if TEST == 'REFUGE':
    train_df = origa.append(g1020)
    val_df = refuge
    
if TEST == 'ORIGA':
    train_df = g1020.append(refuge)
    val_df = origa

if TEST == 'G1020':
    train_df = origa.append(refuge)
    val_df = g1020

In [149]:
# from sklearn.model_selection import train_test_split

# train_df, val_df = train_test_split(g1020, test_size=0.2, shuffle=True, stratify=g1020['Label']) # switch to shuffle=False later

In [150]:
# origa = data.drop(g1020_inds, axis=0, inplace=False)
# g1020 = data.drop(origa_inds, axis=0, inplace=False)
# refuge = data.drop(refuge_inds, axis=0, inplace=False)

In [151]:
# # train_df = origa
# # val_df = g1020

# train_df = g1020
# val_df = origa

In [152]:
# import cv2 as cv
# img = np.array(load_img('../input/cropped-data/Cropped_Data-20220623T201321Z-001/Cropped_Data/G1020/Images/img/image_0.jpg'))
# clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# cl1 = clahe.apply(img)

In [153]:
# clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# cl1 = clahe.apply(img)

In [154]:
def generator(df):
    def callable_generator():
        for index, row in df.iterrows():
            loaded = load_img(row['Image'], target_size=(IMG_SIZE, IMG_SIZE))
            img_arr = tf.image.per_image_standardization(np.array(loaded))
            img_arr = np.array(loaded)/255.
            extracted_feats = np.array([row['CDR'], row['Ecc-Cup'], row['Ecc-Disc']])
            yield img_arr, extracted_feats, np.array([row['Label']]) # np.eye(2)[row['Label']]
    return callable_generator

In [155]:
train_dataset = tf.data.Dataset.from_generator(generator(train_df),
                                              (tf.float32, tf.float32, tf.float32),
                                              ((IMG_SIZE, IMG_SIZE, 3), (3,), (1,)))

In [156]:
BATCH_SIZE = 8
def prep_data(train_dataset):
    train_dataset = train_dataset.cache().batch(BATCH_SIZE).repeat()
    train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return train_dataset

train_dataset = prep_data(train_dataset)

In [157]:
test_dataset = tf.data.Dataset.from_generator(generator(val_df),
                                              (tf.float32, tf.float32, tf.float32),
                                              ((IMG_SIZE, IMG_SIZE, 3), (3,), (1,)))
test_dataset = test_dataset.cache().batch(1).repeat()
test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [158]:
from tensorflow.keras.losses import BinaryCrossentropy
from IPython.display import clear_output

In [159]:
tf.test.gpu_device_name()

2022-06-26 11:47:52.000560: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-26 11:47:52.001179: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-26 11:47:52.001495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-26 11:47:52.001877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-26 11:47:52.002190: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

'/device:GPU:0'

In [160]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

In [161]:
# opt = tf.keras.optimizers.Adam(learning_rate=1e-2)
# bce = BinaryCrossentropy()
CLASS_WEIGHTS = None # {0: 0.4, 1: 0.6}
# @tf.function
def train_step(x, y):
    loss = model.train_on_batch(x, y, class_weight=CLASS_WEIGHTS)
    return loss

def train_model(n_epochs=1):
    end_epoch_accuracies = []
    end_epoch_aucs = []
    for e in range(n_epochs):
        print('Epoch', e+1)
        for step, (a, b, y) in enumerate(train_dataset):
            if step == len(train_df['Image'])//BATCH_SIZE:
                break
            x = {'Image': a}#, 'Numerical': b}
            loss = train_step(x, y)
#             clear_output(wait=True)
#             print(step+1, loss)
        val_preds_cont = []
        val_preds_disc = []
        val_labels = []
        for step, (a, b, y) in enumerate(test_dataset):
            if step == len(val_df['Image']):
                break
            x = {'Image': a}#, 'Numerical': b}
            pred = model.predict(x)
            val_preds_cont.append(pred[0][0])
            val_preds_disc.append(int(pred[0][0] > 0.5))
            val_labels.append(int(float(y[0][0])))
#             print(val_preds_disc)
#             print(pred, 'Ground Truth:', y)
        acc = accuracy_score(val_labels, val_preds_disc)
#         print(val_labels)
        try:
            roc = roc_auc_score(val_labels, val_preds_cont)
        except:
            roc = 0.5
        print(f'Accuracy: {acc}')
        print(f'AUC: {roc}')
        cf = confusion_matrix(val_labels, val_preds_disc)
        print(f'Confusion Matrix\n{cf}')
        end_epoch_accuracies.append(acc)
        end_epoch_aucs.append(roc)
        if roc > 0.8: 
            print('Ending training early...')
            break
    return end_epoch_accuracies, end_epoch_aucs

In [162]:
accs, aucs = train_model(n_epochs=20)

Epoch 1


2022-06-26 11:47:55.793278: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


Accuracy: 0.3875
AUC: 0.7359722222222222
Confusion Matrix
[[119 241]
 [  4  36]]
Epoch 2
Accuracy: 0.7625
AUC: 0.7305555555555556
Confusion Matrix
[[297  63]
 [ 32   8]]
Epoch 3
Accuracy: 0.9075
AUC: 0.8210416666666667
Confusion Matrix
[[360   0]
 [ 37   3]]
Ending training early...


In [163]:
accs

[0.3875, 0.7625, 0.9075]

In [164]:
aucs

[0.7359722222222222, 0.7305555555555556, 0.8210416666666667]

In [165]:
sum(val_df['Label'])/len(val_df['Label'])

0.1

In [176]:
THRESH = 0.45

In [177]:
val_preds_cont = []
val_preds_disc = []
val_labels = []
for step, (a, b, y) in enumerate(test_dataset):
    if step == len(val_df['Image']):
        break
    x = {'Image': a, 'Numerical': b}
    pred = model.predict(x)
    val_preds_cont.append(pred[0][0])
    val_preds_disc.append(int(pred[0][0] > THRESH))
    val_labels.append(int(float(y[0][0])))
acc = accuracy_score(val_labels, val_preds_disc)
cf = confusion_matrix(val_labels, val_preds_disc)
try:
    roc = roc_auc_score(val_labels, val_preds_cont)
except:
    roc = 0.5
print(f'Accuracy: {acc}')
print(f'AUC: {roc}')
print(f'Confusion Matrix\n{cf}')


Accuracy: 0.91
AUC: 0.8210416666666667
Confusion Matrix
[[346  14]
 [ 22  18]]


In [168]:
model.save('origa_test_model')

In [None]:
PRINT_PROB = False
for step, (a, b, y) in enumerate(test_dataset):
  if step == 333:
    break
  x = {'Image': a, 'Numerical': b}
  pred = model.predict(x)
  if PRINT_PROB: print(pred[0][0], 'Ground Truth:', float(y[0][0]))
  else: print(int(pred[0][0]>THRESH), 'Ground Truth:', float(y[0][0]))

0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 1.0
1 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
1 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
1 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 1.0
0 Ground Truth: 0.0
1 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
1 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
1 Ground Truth: 1.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
0 Ground Truth: 0.0
