In [None]:
import re
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import datetime

import scipy
import gc

print(tf.__version__)
print(tfds.__version__)

# Load And Split

In [None]:
manual_dir = r'/kaggle/input/retinopathy-btgraham300/tensorflow_datasets'

In [None]:
(ds_train, ds_val, ds_test), ds_info = \
tfds.load('diabetic_retinopathy_detection/btgraham-300:3.0.0',
          split=['train', 'validation', 'test'],
          download=False,data_dir=manual_dir, with_info=True,
          shuffle_files=False, as_supervised=False)

In [None]:
print(ds_info)

# EDA

In [None]:
vis = tfds.visualization.show_examples(ds_train, ds_info)

In [None]:
for tensor in ds_train.take(1):
    image=tensor['image'].numpy()
    label=tensor['label'].numpy()

In [None]:
print("Etiqueta: ",label)
plt.imshow(image);

In [None]:
label_list = [tensor['label'] for tensor in ds_train.as_numpy_iterator()]
unique, counts = np.unique(label_list, return_counts=True)
plt.bar(unique, counts)
print(*zip(unique, counts))

# Class weights

In [None]:
from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
 
class_weights = compute_class_weight('balanced', np.unique(label_list), label_list)
sample_weights = compute_sample_weight('balanced', label_list)
 
class_weights_dict = dict(enumerate(class_weights))

In [None]:
len(sample_weights)

In [None]:
y = np.bincount(label_list)
ii = np.nonzero(y)[0]
list_zip =[*zip(ii,y[ii])]
list_zip

In [None]:
y

In [None]:
[y[i]*class_weights[i] for i in range(4)]
    

# Preprocesing

In [None]:
def transform_images(row, size, reescale=True):
    x_train = tf.image.resize(row['image'], (size, size))
    if reescale:
        x_train = x_train  / 255
    return x_train, tf.one_hot(row['label'], depth=5)
def transform_images_complete(row, size):
    x_train = tf.image.resize(row['image'], (size, size))
    x_train = x_train  / 255
    return x_train, tf.one_hot(row['label'], depth=5), row['name']

ds_train = ds_train.map(lambda row:transform_images(row, 400))
ds_val = ds_val.map(lambda row:transform_images(row, 400))
ds_test = ds_test.map(lambda row:transform_images(row, 400, reescale=True))

In [None]:
print("Num classes: " + str(ds_info.features['label'].num_classes))
print("Class names: " + str(ds_info.features['label'].names))

In [None]:
NUM_TRAIN_IMAGES = tf.data.experimental.cardinality(ds_train).numpy()
print("Num training images: " + str(NUM_TRAIN_IMAGES))

NUM_VAL_IMAGES = tf.data.experimental.cardinality(ds_val).numpy()
print("Num validating images: " + str(NUM_VAL_IMAGES))

NUM_TEST_IMAGES = tf.data.experimental.cardinality(ds_test).numpy()
print("Num testing images: " + str(NUM_TEST_IMAGES))

In [None]:
ds_train = ds_train.shuffle(1000)
ds_train = ds_train.batch(32)
ds_val = ds_val.shuffle(1000)
ds_val = ds_val.batch(32)
ds_test = ds_test.batch(32)

In [None]:
for i in ds_train.take(1):
    print(i)

# Build Model

In [None]:


from tensorflow.keras.layers import Input # Input Layer
from tensorflow.keras.applications import DenseNet121 # Keras Application
from tensorflow.keras.layers import Dense # Dense Layer (Fully connected)
from tensorflow.keras.models import Model # Model Structure



input_shape=(400, 400, 3)

img_input = Input(shape=input_shape)
base_model = DenseNet121(include_top=False, 
                         input_tensor=img_input, 
                         input_shape=input_shape, 
                         pooling="max", 
                         weights='imagenet')
base_model.trainable = True
x = base_model.output
predictions = Dense(5, 
                    activation="softmax", 
                    name="predictions")(x)
model = Model(inputs=img_input, 
              outputs=predictions)

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), #by default learning_rate=0.001
    loss='categorical_crossentropy',
    metrics=[tf.keras.metrics.CategoricalAccuracy(name="cat_acc"),tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Recall(name='recall'),tf.keras.metrics.Precision(name='precision')]
)

# Train Model

In [None]:
history = model.fit(
    ds_train,
    epochs=20,
    validation_data=ds_val,
    class_weight=class_weights_dict,
    shuffle=True,
    callbacks=[
        tf.keras.callbacks.ReduceLROnPlateau(patience=4, verbose=1),
        tf.keras.callbacks.ModelCheckpoint(filepath='bestmodel.h5',
                                          verbose=1, save_best_only=True)
    ]
)

# Plot model training

In [None]:
import matplotlib.pyplot as plt
auc = history.history['auc']
val_auc = history.history['val_auc']
loss = history.history['loss']
val_loss = history.history['val_loss']
cat_acc = history.history['cat_acc']
val_cat_acc = history.history['val_cat_acc']

epochs = range(len(auc))
plt.figure(figsize=(18, 4.8))
plt.subplot(1,3,1)
plt.plot(epochs, auc, 'r', label='Training auc')
plt.plot(epochs, val_auc, 'b', label='Validation auc')
plt.ylim(0, 1)
plt.title('Training and validation AUC')
plt.legend(loc=0)

plt.subplot(1,3,2)
plt.plot(epochs, loss, 'y-.', label='Training loss')
plt.plot(epochs, val_loss, 'g-.', label='Validation loss')
plt.title('Training and validation Loss')
plt.ylim(0, 2)
plt.legend(loc=0)

plt.subplot(1,3,3)
plt.plot(epochs, cat_acc, 'c-.', label='Training cat_acc')
plt.plot(epochs, val_cat_acc, 'g', label='Validation cat_acc')
plt.title('Training and validation cat_acc')
plt.ylim(0, 1)
plt.legend(loc=0)



plt.show()

# Only use the best weights for the model.

In [None]:
best_model = tf.keras.models.load_model('bestmodel.h5')

# Preds and Evaluation

In [None]:
preds = best_model.predict(ds_test, verbose=1)

In [None]:
evaluation_model = best_model.evaluate(ds_test, verbose=1)

In [None]:
print(*zip(evaluation_model,['loss','cat_acc','auc','recall','precision']))

In [None]:
preds[1]

In [None]:
preds = [np.argmax(pred) for pred in preds]

In [None]:
len(preds)

In [None]:
ds_test = ds_test.unbatch()

In [None]:
actuals = [np.argmax(row[1]) for row in ds_test.as_numpy_iterator()]

In [None]:
len(actuals)

In [None]:
print("preds:",preds[:30])
print("trues:",actuals[:30])

In [None]:
sample_weights = compute_sample_weight('balanced', actuals)

In [None]:
m = tfa.metrics.CohenKappa(num_classes=5, sparse_labels=True, weightage="quadratic")
m.update_state(actuals, preds, sample_weight=sample_weights)
print('Final result: ', m.result().numpy())

In [None]:
from sklearn.metrics import (mean_squared_error,confusion_matrix, plot_confusion_matrix, f1_score)
from sklearn.metrics import classification_report
target_names = ['class 0', 'class 1', 'class 2', 'class 3', 'class 4']
print(classification_report(actuals, preds, target_names=target_names))

In [None]:
confusion_matrix(actuals, preds)