In [None]:
import pandas as pd
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from keras.models import Model, Sequential, load_model, Input
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, LeakyReLU
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.utils import plot_model
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator
import datetime

In [None]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
IMAGE_PATH = "images/"
IMAGE_SIZE = 80
FILTERS=64
EPOCHS=300
REGULARIZER_VALUE = 0.001

In [None]:
target = train_df[train_df.keys().drop("image_id")]
test_ids = test_df["image_id"]

In [None]:
train_len = train_df.shape[0]
test_len = test_df.shape[0]

In [None]:
train_images = np.empty((train_len, IMAGE_SIZE, IMAGE_SIZE, 3))
for i in tqdm(range(train_len)):
    train_images[i] = np.asarray(Image.open(IMAGE_PATH+"Train_{}.jpg".format(i)).resize((IMAGE_SIZE, IMAGE_SIZE)))/255

In [None]:
test_images = np.empty((test_len, IMAGE_SIZE, IMAGE_SIZE, 3))
for i in tqdm(range(test_len)):
    test_images[i] = np.asarray(Image.open(IMAGE_PATH+"Test_{}.jpg".format(i)).resize((IMAGE_SIZE, IMAGE_SIZE)))/255

In [None]:
train_images.shape, test_images.shape

In [None]:
rlr = ReduceLROnPlateau(patience=15, verbose=1)
es = EarlyStopping(patience=35, restore_best_weights=True, verbose=1)
mc = ModelCheckpoint('model.hdf5', save_best_only=True, verbose=0)
model = Sequential()
for i in range(5):
    model.add(Conv2D(filters=FILTERS, kernel_size=3, padding='SAME', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
    model.add(LeakyReLU())
    model.add(Conv2D(filters=FILTERS, kernel_size=3, padding='SAME'))
    model.add(LeakyReLU())
    if i < 4:
        model.add(Conv2D(filters=FILTERS, kernel_size=5, padding='SAME'))
        model.add(LeakyReLU())
    model.add(MaxPooling2D())
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    FILTERS *= 2
model.add(Flatten())
model.add(Dense(16, activation="relu"))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(4, activation="softmax"))
model.summary()

In [None]:
x_train, x_val, y_train, y_val = train_test_split(train_images, target.to_numpy(), test_size = 0.2, random_state=64378538)

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
imagegen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)
history = model.fit_generator(
    imagegen.flow(x_train, y_train, batch_size=32),
    epochs=EPOCHS,
    steps_per_epoch=x_train.shape[0] // 32,
    verbose=1,
    callbacks=[rlr, es, mc],
    validation_data=(x_val, y_val)
)
# load best model
model = load_model('model.hdf5')

In [None]:
from matplotlib import pyplot as plt
h = history.history
print(h.keys())
offset = 5
epochs = range(offset, len(h['loss']))

plt.figure(1, figsize=(20, 6))

plt.subplot(121)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.plot(epochs, h['loss'][offset:], label='train')
plt.plot(epochs, h['val_loss'][offset:], label='val')
plt.legend()

plt.subplot(122)
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.plot(h[f'accuracy'], label='train')
plt.plot(h[f'val_accuracy'], label='val')
plt.legend()

plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

pred_test = model.predict(x_val)
roc_sum = 0
for i in range(4):
    score = roc_auc_score(y_val[:, i], pred_test[:, i])
    roc_sum += score
    print(f'{score:.3f}')

roc_sum /= 4
print(f'totally:{roc_sum:.3f}')

In [None]:
pred = model.predict(test_images)

res = pd.DataFrame()
res['image_id'] = test_ids
res['healthy'] = pred[:, 0]
res['multiple_diseases'] = pred[:, 1]
res['rust'] = pred[:, 2]
res['scab'] = pred[:, 3]
res.to_csv('submission_id_{}.csv'.format(datetime.datetime.fromtimestamp), index=False)
res.head(40)