## Loading packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.applications import EfficientNetB4
# from efficientnet.keras import EfficientNetB4

from tensorflow.keras.optimizers import Adam

# ignoring warnings
import warnings
warnings.simplefilter("ignore")

import os, cv2, json
from PIL import Image
from tf_bi_tempered_loss import BiTemperedLogisticLoss


# First look at the data

In [2]:
print('Train images: %d' %len(os.listdir(
    "train_images")))

Train images: 21397


In [3]:
with open("label_num_to_disease_map.json") as file:
    print(json.dumps(json.loads(file.read()), indent=4))

{
    "0": "Cassava Bacterial Blight (CBB)",
    "1": "Cassava Brown Streak Disease (CBSD)",
    "2": "Cassava Green Mottle (CGM)",
    "3": "Cassava Mosaic Disease (CMD)",
    "4": "Healthy"
}


In [4]:
train_labels = pd.read_csv("train.csv")
train_labels.head()

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


In [5]:
train_labels.label.value_counts()

3    13158
4     2577
2     2386
1     2189
0     1087
Name: label, dtype: int64

# Preparation for modeling

In [6]:
tf.keras.backend.clear_session()

In [7]:
# Main parameters
BATCH_SIZE = 16
STEPS_PER_EPOCH = len(train_labels)*0.8 / BATCH_SIZE
VALIDATION_STEPS = len(train_labels)*0.2 / BATCH_SIZE
EPOCHS = 20
TARGET_SIZE = 380

### ImageDataGenerator

In [8]:
train_labels.label = train_labels.label.astype('str')

train_datagen = ImageDataGenerator(validation_split = 0.20,
                                     rotation_range = 30,
                                     zoom_range = 0.15,
                                     horizontal_flip = True,
                                     vertical_flip = True,
                                     brightness_range=[0.4,1.4],
                                     fill_mode = 'nearest')
                                    

train_generator = train_datagen.flow_from_dataframe(train_labels,
                         directory ="train_images",
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "categorical")


validation_datagen = ImageDataGenerator(validation_split = 0.20)

validation_generator = validation_datagen.flow_from_dataframe(train_labels,
                         directory = "train_images",
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "categorical")

Found 17118 validated image filenames belonging to 5 classes.
Found 4279 validated image filenames belonging to 5 classes.


In [9]:
from collections import Counter

items = Counter(validation_generator.classes)
items

Counter({0: 200, 3: 2638, 1: 436, 2: 495, 4: 510})

In [10]:
items = Counter(train_generator.classes)
items

Counter({3: 10520, 4: 2067, 2: 1891, 1: 1753, 0: 887})

# Modeling

In [11]:
def create_model():

    conv_base = EfficientNetB4(include_top = False, weights='imagenet',
                               input_shape = (TARGET_SIZE, TARGET_SIZE, 3))
    conv_base.trainable = False
    model = conv_base.output
    model = layers.Flatten()(model)
    model = layers.Dense(5, activation = "softmax")(model)
    model = models.Model(conv_base.input, model)

    model.compile(optimizer = Adam(lr = 0.0001),
                  loss = BiTemperedLogisticLoss(t1=0.6, t2=1.2,label_smoothing=0.1),
                  metrics = ["acc", tf.keras.metrics.AUC()])
    return model

In [12]:
model = create_model()
model.summary()

______________
block6d_add (Add)               (None, 12, 12, 272)  0           block6d_drop[0][0]               
                                                                 block6c_add[0][0]                
__________________________________________________________________________________________________
block6e_expand_conv (Conv2D)    (None, 12, 12, 1632) 443904      block6d_add[0][0]                
__________________________________________________________________________________________________
block6e_expand_bn (BatchNormali (None, 12, 12, 1632) 6528        block6e_expand_conv[0][0]        
__________________________________________________________________________________________________
block6e_expand_activation (Acti (None, 12, 12, 1632) 0           block6e_expand_bn[0][0]          
__________________________________________________________________________________________________
block6e_dwconv (DepthwiseConv2D (None, 12, 12, 1632) 40800       block6e_expand_activation[0][

In [13]:
model_save = ModelCheckpoint('Models/Imagenet/Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5', 
                             save_best_only = True, 
                             save_weights_only = True,
                             monitor = 'val_auc', 
                             mode = 'max', verbose = 1)

early_stop = EarlyStopping(monitor = 'val_auc', 
                           patience = 2, mode = 'max', verbose = 1,
                           restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_auc', factor = 0.1, 
                              patience = 1, 
                              mode = 'max', verbose = 1)
history = model.fit(
    train_generator,
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs = EPOCHS,
    validation_data = validation_generator,
    validation_steps = VALIDATION_STEPS,
    callbacks = [model_save, early_stop, reduce_lr])

Epoch 1/20
Epoch 00001: val_auc improved from -inf to 0.79170, saving model to Models/Imagenet\Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5
Epoch 2/20
Epoch 00002: val_auc did not improve from 0.79170

Epoch 00002: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 3/20
Epoch 00003: val_auc improved from 0.79170 to 0.80137, saving model to Models/Imagenet\Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5
Epoch 4/20
Epoch 00004: val_auc improved from 0.80137 to 0.80595, saving model to Models/Imagenet\Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5
Epoch 5/20
Epoch 00005: val_auc improved from 0.80595 to 0.80833, saving model to Models/Imagenet\Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5
Epoch 6/20
Epoch 00006: val_auc improved from 0.80833 to 0.81481, saving model to Models/Imagenet\Imagenet_Bitempered_EffNetB4_380_16_best_weights.h5
Epoch 7/20
Epoch 00007: val_auc improved from 0.81481 to 0.81691, saving model to Models/Imagenet\Imagenet_Bite

KeyboardInterrupt: 

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)
                  
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
sns.set_style("white")
plt.suptitle('Train history', size = 15)

ax1.plot(epochs, acc, "bo", label = "Training acc")
ax1.plot(epochs, val_acc, "b", label = "Validation acc")
ax1.set_title("Training and validation acc")
ax1.legend()

ax2.plot(epochs, loss, "bo", label = "Training loss", color = 'red')
ax2.plot(epochs, val_loss, "b", label = "Validation loss", color = 'red')
ax2.set_title("Training and validation loss")
ax2.legend()

plt.show()

In [18]:
weights = model.get_weights()
np.save("Models/Imagenet/bitemp_B4_Numpy_Weights", weights) 

In [19]:
model.save('Models/Imagenet/bitemp_Model_EffNetB4_380_20epoch.h5')

In [20]:
model.save_weights('Models/Imagenet/bitemp_B4_SavedWeights.hdf5')