In [78]:
import pandas as pd
import numpy as np
import pylab as plt
import glob
import os
import random
import keras
from keras import backend as K
from keras import layers
from keras import metrics
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras.models import Sequential, Model
from keras.utils import plot_model
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import cv2
from tqdm import tqdm
from keras.callbacks import EarlyStopping
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from collections import Counter
from sklearn.model_selection import train_test_split
import sklearn
from collections import Counter
from pandas import DataFrame
import gc
import tensorflow as tf
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight

In [79]:
train_csv_path = "../input/cassava-leaf-disease-classification/train.csv"
label_json_path = "../input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
images_dir_path = "../input/cassava-leaf-disease-classification/train_images"
test_csv_path = "../input/cassava-leaf-disease-classification/sample_submission.csv"
train_csv['label'] = train_csv['label'].astype('string')

x_train, x_val, y_train, y_val = train_test_split(train_csv['image_id'], train_csv['label'], test_size = 0.1, random_state = 27, stratify=train_csv['label'])
y_train_onehot, y_val_onehot, d_class_weights = encode_target_variable(y_train,y_val)
#train df
df_train = pd.DataFrame(columns=['image_id','label'])
df_train['image_id'] = x_train
df_train['label'] = y_train

#validation df
df_val = pd.DataFrame(columns=['image_id','label'])
df_val['image_id'] = x_val
df_val['label'] = y_val

df_train.reset_index(drop=True, inplace=True)
df_val.reset_index(drop=True, inplace=True)

label_class = pd.read_json(label_json_path, orient='index')
label_class = label_class.values.flatten().tolist()
IMG_SIZE = 512
BATCH_SIZE = 24
EPOCHS = 15
CHANNELS = 3

18281    0
14735    0
3906     1
11276    3
        ..
1570     1
20304    3
16067    1
18894    2
13547    3
Name: label, Length: 19257, dtype: string as keyword args. From version 0.25 passing these as positional arguments will result in an error


In [51]:
print("Label names :")
for i, label in enumerate(label_class):
    print(f" {i}. {label}")

Label names :
 0. Cassava Bacterial Blight (CBB)
 1. Cassava Brown Streak Disease (CBSD)
 2. Cassava Green Mottle (CGM)
 3. Cassava Mosaic Disease (CMD)
 4. Healthy


In [93]:
train_gen = ImageDataGenerator(
                                rotation_range=270,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                brightness_range=[0.1,0.9],
                                shear_range=25,
                                zoom_range=0.3,
                                channel_shift_range=0.1,
                                horizontal_flip=True,
                                vertical_flip=True,
                                rescale=1/255,
                                validation_split=0.2
                               )
                                    
    
valid_gen = ImageDataGenerator(rescale=1/255,
                               validation_split = 0.2
                              )

test_gen=ImageDataGenerator(rescale=1./255.)

In [95]:
train_generator = train_gen.flow_from_dataframe(
                            dataframe=df_train,
                            directory = images_dir_path,
                            x_col = "image_id",
                            y_col = "label",
                            target_size = (IMG_SIZE, IMG_SIZE),
                            class_mode = "categorical",
                            batch_size = BATCH_SIZE,
                            shuffle = True,
                            subset = "training"

)

valid_generator = valid_gen.flow_from_dataframe(
                            dataframe=df_val,
                            directory = images_dir_path,
                            x_col = "image_id",
                            y_col = "label",
                            target_size = (IMG_SIZE, IMG_SIZE),
                            class_mode = "categorical",
                            batch_size = BATCH_SIZE,
                            shuffle = False,
                            subset = "validation"
)

predict_generator = test_gen.flow_from_dataframe(
                            dataframe=df_val,
                            directory = images_dir_path,
                            x_col = "image_id",
                            y_col = "label",
                            target_size = (IMG_SIZE, IMG_SIZE),
                            class_mode = None,
                            batch_size = BATCH_SIZE,
                            shuffle = False)

Found 15406 validated image filenames belonging to 5 classes.
Found 428 validated image filenames belonging to 5 classes.
Found 2140 validated image filenames.


In [82]:
#Encode target variables
def encode_target_variable(y_train,y_val):

    class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
    d_class_weights = dict(enumerate(class_weights))
    onehot_encoder = OneHotEncoder(sparse=False)
    y_train = np.array(y_train)
    y_val = np.array(y_val)
    y_train = y_train.reshape(-1,1)
    y_val = y_val.reshape(-1,1)
    y_train_onehot = onehot_encoder.fit_transform(y_train)
    y_val_onehot = onehot_encoder.fit_transform(y_val)
    return y_train_onehot, y_val_onehot, d_class_weights

In [83]:
def create_model_0(input_shape):
    input_img = Input(shape=input_shape)  
    x = Conv2D(16, (5, 5), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (5, 5), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (5, 5), activation='relu', padding='same')(x)
    x = Dropout(0.3)(x)
    y = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
    y = MaxPooling2D((2, 2), padding='same')(y)
    y = Conv2D(16, (3, 3), activation='relu', padding='same')(y)
    y = MaxPooling2D((2, 2), padding='same')(y)
    y = Conv2D(16, (3, 3), activation='relu', padding='same')(y)
    y = Dropout(0.4)(y)
    z = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
    z = MaxPooling2D((2, 2), padding='same')(z)
    z = Conv2D(16, (3, 3), activation='relu', padding='same')(z)
    z = MaxPooling2D((2, 2), padding='same')(z)
    z = Conv2D(16, (3, 3), activation='relu', padding='same')(z)

    #out = layers.concatenate([x, y, z])
    out = layers.Flatten()(z)
    out = Dense(16, activation='relu')(out)
    out = Dense(5, activation='softmax')(out)
    
    model_f = Model(inputs=[input_img], outputs=[out])
    model_f.summary()
    return model_f

In [84]:
def create_model_1(input_shape):
    weights_path = '../input/weightsclimbers/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
    resnet50 = ResNet50(weights = None ,include_top=False, input_shape=input_shape)
    resnet50.load_weights(weights_path, by_name=True)
    resnet50.trainable = False # remove if you want to retrain resnet weights
    # resnet50.summary()
    transfer_model_1 = Sequential() 
    transfer_model_1.add(resnet50)
    transfer_model_1.add(Flatten())
    transfer_model_1.add(Dense(128, activation='relu'))
    transfer_model_1.add(Dropout(0.2))
    transfer_model_1.add(Dense(5, activation='softmax'))
    transfer_model_1.summary()
    return transfer_model_1

In [85]:
def create_model_2(x_train,x_val):
    weights_path = '../input/weightsclimbers/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
    vgg16 = VGG16(weights=None, include_top=False, input_shape=(IMG_SIZE_X, IMG_SIZE_Y, 3))
    vgg16.load_weights(weights_path, by_name=True)
    vgg16.trainable = False # remove if you want to retrain vgg16 weights
    # vgg16.summary()
    transfer_model_2 = Sequential()
    transfer_model_2.add(vgg16)
    transfer_model_2.add(Flatten())
    transfer_model_2.add(Dense(128, activation='relu'))
    transfer_model_2.add(Dropout(0.2))
    transfer_model_2.add(Dense(5, activation='softmax'))
    transfer_model_2.summary()
    x_train_new = x_train
    x_val_new = x_val
    x_train_new = keras.applications.vgg16.preprocess_input(x_train_new)
    x_val_new = keras.applications.vgg16.preprocess_input(x_val_new)
    return transfer_model_2, x_train_new, x_val_new

In [86]:
def create_model_3(x_train,x_val):
    weights_path = '../input/weightsclimbers/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
    inceptionV3 = InceptionV3(weights=None, include_top=False, input_shape=(IMG_SIZE_X, IMG_SIZE_Y, 3))
    inceptionV3.load_weights(weights_path, by_name=False)
    inceptionV3.trainable = False # remove if you want to retrain rinceptionV3 weights
    # inceptionV3.summary()
    transfer_model_3 = Sequential()
    transfer_model_3.add(inceptionV3)
    transfer_model_3.add(Flatten())
    transfer_model_3.add(Dense(128, activation='relu'))
    transfer_model_3.add(Dropout(0.2))
    transfer_model_3.add(Dense(5, activation='softmax'))
    transfer_model_3.summary()
    x_train_new = x_train
    x_val_new = x_val
    x_train_new = keras.applications.inception_v3.preprocess_input(x_train_new)
    x_val_new = keras.applications.inception_v3.preprocess_input(x_val_new)
    return transfer_model_3, x_train_new, x_val_new

In [115]:
def train_model(model, batch_size, epochs, weights):
    
    batch_size = batch_size
    epochs = epochs
    
    def scheduler(epoch, lr):
        if epoch >3 and epoch%2==0:
            return lr/1.25
        else:
            return lr
    
    callback_learning_rate = tf.keras.callbacks.LearningRateScheduler(scheduler)
    
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9), 
              metrics=['categorical_accuracy',keras.metrics.AUC(),keras.metrics.Recall()])
    
    earlyStop_callback = tf.keras.callbacks.EarlyStopping(monitor='categorical_accuracy', 
                                                          patience=2,restore_best_weights = True, min_delta = 0.01)
    
    
    
    model.fit_generator(train_generator, 
                    epochs=epochs,  # one forward/backward pass of training data
                    steps_per_epoch=25,  # number of images comprising of one epoch
                    validation_data=valid_generator, # Or validation_data=valid_generator
                    validation_steps=valid_generator.samples//valid_generator.batch_size,
                    callbacks = [earlyStop_callback, callback_learning_rate],
                       class_weight = {0:1,1:1,2:1,3:0.1,4:1})
    return model

In [116]:
################## Main function ######################

    
#Baseline model

input_shape = [IMG_SIZE,IMG_SIZE,CHANNELS]
# baseline_model = create_model_0(input_shape)
# baseline_model = train_model(baseline_model, BATCH_SIZE, EPOCHS)
y_train_onehot, y_val_onehot, class_weights = encode_target_variable(y_train,y_val)

# #Resnet50
RESNET_model = create_model_1(input_shape)
RESNET_model = train_model(RESNET_model, BATCH_SIZE, EPOCHS, class_weights)
# #vgg16
# x_train, x_val, y_train, y_val = data_preparation(training_data[2], training_data[3], new_h, new_w, channels)
# y_train_onehot, y_val_onehot, class_weights = encode_target_variable(y_train,y_val)
# del y_train, y_val
# vgg16_model, x_train_new, x_val_new = create_model_2(x_train,x_val)
# del x_train,x_val
# gc.collect()
# vgg16_model = train_model(vgg16_model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot, class_weights)
# #InceptionV3
# x_train, x_val, y_train, y_val = data_preparation(training_data[4], training_data[5], new_h,new_w, channels)
# y_train_onehot, y_val_onehot, class_weights = encode_target_variable(y_train,y_val)
# del y_train, y_val
# inception_model, x_train_new, x_val_new = create_model_3(x_train,x_val)
# del x_train,x_val
# gc.collect()
# inception_model = train_model(inception_model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot, class_weights)

18281    0
14735    0
3906     1
11276    3
        ..
1570     1
20304    3
16067    1
18894    2
13547    3
Name: label, Length: 19257, dtype: string as keyword args. From version 0.25 passing these as positional arguments will result in an error


Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 16, 16, 2048)      23587712  
_________________________________________________________________
flatten_10 (Flatten)         (None, 524288)            0         
_________________________________________________________________
dense_20 (Dense)             (None, 128)               67108992  
_________________________________________________________________
dropout_10 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 5)                 645       
Total params: 90,697,349
Trainable params: 67,109,637
Non-trainable params: 23,587,712
_________________________________________________________________


AttributeError: 'list' object has no attribute 'keys'

In [112]:
keras.models.save_model(RESNET_model,'../output')

In [113]:
RESNET_model = keras.models.load_model('../output')

In [114]:
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = RESNET_model.predict_generator(predict_generator, steps = predict_generator.samples//predict_generator.batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(df_val['label'].astype('int'), y_pred))
print('Classification Report')
target_names = [' 0. Cassava Bacterial Blight (CBB)', ' 1. Cassava Brown Streak Disease (CBSD)', ' 2. Cassava Green Mottle (CGM)',' 3. Cassava Mosaic Disease (CMD)','4. Healthy']
print(classification_report(df_val['label'].astype('int'), y_pred, target_names=target_names))

Confusion Matrix
[[   0    0    0  109    0]
 [   0    0    0  219    0]
 [   0    0    0  238    0]
 [   0    0    0 1316    0]
 [   0    0    0  258    0]]
Classification Report
                                         precision    recall  f1-score   support

      0. Cassava Bacterial Blight (CBB)       0.00      0.00      0.00       109
 1. Cassava Brown Streak Disease (CBSD)       0.00      0.00      0.00       219
          2. Cassava Green Mottle (CGM)       0.00      0.00      0.00       238
        3. Cassava Mosaic Disease (CMD)       0.61      1.00      0.76      1316
                             4. Healthy       0.00      0.00      0.00       258

                               accuracy                           0.61      2140
                              macro avg       0.12      0.20      0.15      2140
                           weighted avg       0.38      0.61      0.47      2140



In [106]:
df_val['label'].astype('int')

0       2
1       2
2       3
3       3
4       3
       ..
2135    3
2136    3
2137    3
2138    3
2139    2
Name: label, Length: 2140, dtype: int64

In [24]:
preds = []
direc = "../input/cassava-leaf-disease-classification/"
test_direc = direc + "test_images/"
sample_sub_csv = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')

for image in sample_sub_csv.image_id:
    img = keras.preprocessing.image.load_img('../input/cassava-leaf-disease-classification/test_images/' + image)
    img = keras.preprocessing.image.img_to_array(img)
    img = keras.preprocessing.image.smart_resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.reshape(img, (-1, IMG_SIZE, IMG_SIZE, 3))
    prediction = RESNET_model.predict(img/255)
    preds.append(np.argmax(prediction))

final_submission = pd.DataFrame({'image_id': sample_sub_csv.image_id, 'label': preds})
final_submission.to_csv('submission.csv', index=False) 