# Train Diabetic Retinopathy Model

In [None]:
base_dir = '../data/'
train_dir = base_dir + 'train/'
validation_dir = base_dir + 'test/'
img_width = 540
img_height = 540
img_channel = 3

### Gerando dados artificialmente

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

### Montando o modelo

In [None]:
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from keras.models import Model

inputs = Input(shape=(img_height, img_width, img_channel))
X =  Conv2D(32, (3, 3), activation='relu')(inputs)
X = MaxPooling2D((2, 2))(X)
X = Conv2D(64, (3, 3), activation='relu')(X)
X = MaxPooling2D((2, 2))(X)
X = Conv2D(128, (3, 3), activation='relu')(X)
X = MaxPooling2D((2, 2))(X)
X = Conv2D(128, (3, 3), activation='relu')(X)
X = MaxPooling2D((2, 2))(X)
X = Flatten()(X)
X = Dropout(0.5)(X)
X = Dense(512, activation='relu')(X)
outputs = Dense(5, activation='softmax')(X)

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

### Processamento de dados

In [None]:
from keras_preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

validation_datagen = ImageDataGenerator(
    rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=100,
    class_mode='categorical')

In [None]:
validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(img_width, img_height),
    batch_size=100,
    class_mode='categorical')

In [None]:
data = next(iter(train_generator))

# Check Class Weight

In [None]:
import os

len_0 = len(os.listdir(train_dir + '0'))
len_1 = len(os.listdir(train_dir + '1'))
len_2 = len(os.listdir(train_dir + '2'))
len_3 = len(os.listdir(train_dir + '3'))
len_4 = len(os.listdir(train_dir + '4'))

len_tot = len_0 + len_1 + len_2 + len_3 + len_4

w0 = len_0 * 1./ len_tot
w1 = len_1 * 1./ len_tot
w2 = len_2 * 1./ len_tot
w3 = len_3 * 1./ len_tot
w4 = len_4 * 1./ len_tot

#class_weight = {0: w0, 1: w1, 2: w2, 3: w3, 4: w4}
class_weight = {0: 0.46182373, 
                1: 1.33303985, 
                2: 0.62618633, 
                3: 3.90590631, 
                4: 4.31936937}

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

earlystopper = EarlyStopping(patience=20, verbose=1)

model_path = os.path.join('models', 'diabetic_retinopathy_model.h5') 

checkpointer = ModelCheckpoint(model_path, verbose=1, save_best_only=True)

In [None]:
from keras import callbacks
from keras.callbacks import TensorBoard
import time

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

history = model.fit_generator(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=200, 
    validation_data=validation_generator,
    validation_steps=STEP_SIZE_VALID,
    class_weight=class_weight,
    callbacks=[earlystopper, checkpointer])


# Plot Results

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Carregando o modelo
- Este modelo leva em torno de 10+ minutos para ser treinado em uma 1060 / 980ti
- Para a aula vamos simplesmente carregar o modelo salvo (pesos e vieses)

In [None]:
from keras import models

model_path = os.path.join('/gdrive/My Drive/Colab Notebooks/pneumonia/', 'models', 'pneumonia_from_scratch_data_augmented.h5') 

model = models.load_model(model_path)

# Predict Test Base

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
batch_size = 1000

test_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(img_width, img_height),
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='categorical')

valid_X, valid_Y = next(test_generator)

In [None]:
#test_loss, test_acc = model.evaluate(next(test_generator))

In [None]:
pred_Y = model.predict(valid_X, batch_size = test_generator.n // test_generator.batch_size, verbose = True)

In [None]:
pred_Y

# Metrics

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
plt.matshow(confusion_matrix(np.argmax(valid_Y, -1), np.argmax(pred_Y,-1)))
print(classification_report(np.argmax(valid_Y, -1), 
                            np.argmax(pred_Y,-1)))

In [None]:
confusion_matrix(np.argmax(valid_Y, -1), np.argmax(pred_Y,-1))

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
fpr, tpr, _ = roc_curve(np.argmax(valid_Y,-1)==0, pred_Y[:,0])
fig, ax1 = plt.subplots(1,1, figsize = (5, 5), dpi = 150)
ax1.plot(fpr, tpr, 'b.-', label = 'Model (AUC:%2.2f)' % roc_auc_score(np.argmax(valid_Y,-1)==0, pred_Y[:,0]))
ax1.plot(fpr, fpr, 'k-', label = 'Random Guessing')
ax1.legend(loc = 4)
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate');
ax1.set_title('Lung Opacity ROC Curve')