### Import Libraries

In [0]:
from zipfile import ZipFile
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
from cv2 import resize, imread
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [0]:
!pip install -U git+https://github.com/qubvel/efficientnet

In [0]:
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from efficientnet.tfkeras import EfficientNetB4

In [0]:
from google.colab import drive
drive.mount('/content/drive')

### Extracting Zip

In [0]:
with ZipFile('/content/drive/My Drive/Plant/plant-pathology-2020-fgvc7.zip') as f:
    print('Extracting')
    f.extractall()
    print('Done!!')

In [0]:
train_csv = pd.read_csv('train.csv')

In [0]:
train_csv.drop('image_id', axis = 1, inplace = True)

In [0]:
mult_diseases_index = train_csv[train_csv['multiple_diseases'] == 1].index

In [0]:
labels = train_csv.values

In [0]:
def extract_images(img_size):
    path = 'images'
    train_images = []
    test_images = []
    train_images_dict = {}
    test_images_dict = {}

    for image in os.listdir(os.path.join(os.curdir, path)): 
        if image.split('_')[0] == 'Train':
            idx = int(image.split('_')[1].split('.')[0])
            train_images_dict[idx] = image
        else:
            idx = int(image.split('_')[1].split('.')[0])
            test_images_dict[idx] = image
    
    for idx in tqdm(range(len(train_images_dict))):
        arr_train = imread(os.path.join(os.curdir, path, train_images_dict[idx]))
        arr_train = resize(arr_train, (img_size, img_size))
        train_images.append(arr_train)
        
    for idx in tqdm(range(len(test_images_dict))):
        arr_test = imread(os.path.join(os.curdir, path, test_images_dict[idx]))
        arr_test = resize(arr_test, (img_size, img_size))
        test_images.append(arr_test) 
        
        
    return np.array(train_images), np.array(test_images)

In [0]:
train_images, test_images = extract_images(380)

In [0]:
np.save('train_images_380.npy', train_images)
np.save('test_images_380.npy', test_images)

In [0]:
train_images = np.load('train_images_380.npy')

##  Rescaling

In [0]:
idg = ImageDataGenerator(rescale= 1/255)

In [0]:
x = next(idg.flow(train_images, labels, shuffle = False, batch_size=train_images.shape[0]))

In [0]:
del train_images

### Extra images

In [0]:
new_idg = ImageDataGenerator(rotation_range= 90, zoom_range= 0.4, shear_range= 0.5,
                             horizontal_flip = True, vertical_flip = True, dtype = 'uint8'
                             )

In [0]:
mult_diseases = []
mult_diseases_label = []

for n in mult_diseases_index:
    mult_diseases.append(train_images[n])
    mult_diseases_label.append(labels[n])


mult_diseases = np.array(mult_diseases)
mult_diseases_label = np.array(mult_diseases_label)

In [0]:
x_new_3 = next(new_idg.flow(mult_diseases, mult_diseases_label, batch_size= 450))

In [0]:
x_concat = np.concatenate((train_images, x_new[0], x_new_1[0], x_new_2[0], x_new_3[0]), axis = 0)

In [0]:
y_concat = np.concatenate((labels, x_new[1], x_new_1[1], x_new_2[1], x_new_3[1]), axis = 0)

## Splitting Training and Dev data

In [0]:
x_train, x_dev, y_train, y_dev = train_test_split(x[0], x[1], test_size = 0.2, random_state = 101)

## Model

In [0]:
efficientnet = EfficientNetB4()

In [0]:
nasnet = load_model('/content/drive/My Drive/Plant/nasnet.h5')

In [0]:
xception = load_model('/content/drive/My Drive/Plant/xception.h5')

In [0]:
def get_layers_pretrain(pretrained_model, trainable = True):
    input_layer = pretrained_model.input
    output_layer = pretrained_model.get_layer('top_dropout').output

    model = Model(inputs = input_layer, outputs = output_layer)

    if trainable == False:
        for layer in model.layers:
            layer.trainable = False

    return model

In [0]:
pretrained_model = get_layers_pretrain(efficientnet, False)

In [0]:
pretrained_model.summary()

In [0]:
def ensemble_model(model_1, model_2, trainable = True):
    input_1 = model_1.input
    input_2 = model_2.input
    output_1 = model_1.get_layer('global_average_pooling2d').output
    output_2 = model_2.get_layer('avg_pool').output

    model = Model(inputs = [input_1, input_2], outputs = [output_1, output_2])

    if trainable == False:
        for layer in model.layers:
            layer.trainable = False

    return model

In [0]:
ensemble_model = ensemble_model(nasnet, xception, False)

In [0]:
del pretrained_model

In [0]:
def get_final_model(model):

    input_l = model.input

    h = Dense(1024, activation='relu', kernel_initializer = 'glorot_normal')(model.output)
    h = Dropout(0.2)(h)
    h = BatchNormalization()(h)
    h = Dense(512, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = Dropout(0.2)(h)
    h = BatchNormalization()(h)
    h = Dense(256, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = BatchNormalization()(h)   
    h = Dense(128, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = BatchNormalization()(h)
    h = Dense(64, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = BatchNormalization()(h)
    h = Dense(32, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = Dropout(0.2)(h)
    h = BatchNormalization()(h)
    h = Dense(16, activation='relu', kernel_initializer = 'glorot_normal')(h)
    h = Dropout(0.2)(h)
    h = BatchNormalization()(h)
    output_l = Dense(4, activation = 'softmax')(h)


    final_model = Model(inputs = model.input, outputs = output_l)

    return final_model

In [0]:
model = get_final_model(pretrained_model)

In [0]:
model.summary()

In [0]:
model.compile(optimizer = Adam(lr = 0.001),
              loss = 'categorical_crossentropy',
              metrics = ['acc'])

In [0]:
early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min')
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.6, patience = 2, mode = 'min', min_lr= 0.00001)
checkpoint = ModelCheckpoint('checkpoint', save_best_only= True, mode = 'min', monitor= 'val_loss', verbose = 0)

In [0]:
history = model.fit(x_train, y_train, epochs= 50, validation_data = (x_dev, y_dev),
                    verbose=1, callbacks=[early_stopping, reduce_lr, checkpoint])

### Loss and Accuracy Plot

In [0]:
def loss_acc_plot(history, accuracy = False):
    
    data = pd.DataFrame(history.history)

    plt.title('Training Loss vs Validation Loss')
    plt.plot(data['loss'], c = 'b', label = 'loss', )
    plt.plot(data['val_loss'], c = 'orange', label = 'val_loss')
    plt.legend()
    plt.show()

    if accuracy == True:
        plt.title('Training Accuracy vs Validation Accuracy')
        plt.plot(data['acc'], c = 'b', label = 'accuracy')
        plt.plot(data['val_acc'], c = 'orange', label = 'val_accuracy')
        plt.legend()
        plt.show()


In [0]:
loss_acc_plot(history, accuracy= True)

### Prediction

In [0]:
test_images = np.load('test_images_380.npy')

In [0]:
x_test = next(idg.flow(test_images, shuffle = False, batch_size=test_images.shape[0]))

In [0]:
model_best = load_model('checkpoint')

In [0]:
prediction = model_best.predict(x_test)

In [0]:
final = pd.DataFrame(prediction)

In [0]:
final = round(final,2)

In [0]:
final.to_csv('prediction.csv')