In [None]:
#import necessary libraries
import numpy as np 
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from random import shuffle
from keras.utils  import to_categorical
from sklearn.model_selection import KFold, StratifiedKFold

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# original directory with complete dataset
original_dir ="../input/total-cucumber-multi/Total_Cucumber_multi/Total_Cucumber"
# load .csv files that contains labels of images
total_data = pd.read_csv('../input/total-cucumber-multi/cucumber_3.csv')
labels = total_data[['label']] #labels
names = total_data[['name']]  # filenames
    
# StratifiedKFold evenly splits images of all classes into folds    
stratkf = StratifiedKFold(n_splits = 3, random_state = 7, shuffle = True)


VAL_ACCURACIES = []  # for storing validation accuracies across each fold
VAL_LOSSES = []   # for storing validation loss across each fold

fold_count = 1 # initializing fold count to keep track of all folds

# ImageDataGenerator is used for normalization of dataset and to perform Augmentations on the Data
train_datagen = ImageDataGenerator(
    rescale = (1./255),
    rotation_range=90,
    width_shift_range=.2, 
    height_shift_range=.2,
    zoom_range = 0.2,
    brightness_range=(0.9,1.5),
    horizontal_flip=True)

# No need to apply augmentation on validation data to mimic the unseen real world data
validation_datagen = ImageDataGenerator(rescale = (1./255))




In [None]:
# retrieve model name acrossing each fold for storing models of each fold separately
def getModelName(i):
    return 'model_'+str(i)+'.h5'

In [None]:
#importing required libraries building model and for training and testing purposes
import keras 
from keras.layers import Dense,Dropout, Conv2D,MaxPooling2D , Activation, Flatten
from keras.models import Sequential
from tensorflow.keras.layers import *
import math
import tensorflow as tf

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

# loop across all three folds
for tr_index, val_index in stratkf.split(names,labels):
    training_data = total_data.iloc[tr_index]
    validation_data = total_data.iloc[val_index]



    train_generator = train_datagen.flow_from_dataframe(
        training_data,
        directory = original_dir,
        target_size=(224, 224),
        x_col = "name",
        y_col = "label",
        batch_size= 32,
        class_mode='binary',
        shuffle=True)

    validation_generator = validation_datagen.flow_from_dataframe(
        validation_data,
        directory = original_dir,
        target_size=(224, 224),
        x_col = "name",
        y_col = "label",
        batch_size= 32,
        class_mode='binary',
        shuffle=True)
    
    model = Sequential() #initialize sequential model. It allows to stack layers sequentially

    # adding convolution layers and Pooling layers
    model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    # flattening the output vector into a single dimension so that it can be fed to dense layers for prediction

    model.add(Flatten())
    # adding dense layers
    model.add(Dense(units=4096,activation="relu"))
    model.add(Dense(units=4096,activation="relu"))
    #finally the output layer predicts the output label of image
    model.add(Dense(units=1, activation="sigmoid"))
    # compile model
    model.compile(optimizer= keras.optimizers.Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()


    #using callback functions offered by keras to avoid overfitting and to save the best model during training
    # stops training when validation loss does not improve for consecutive 4 epochs
    earlyStopping = EarlyStopping(monitor='val_loss', verbose=0, mode='min', patience = 4)
    # saves best model with minimum loss
    mcp_save = ModelCheckpoint('./'+getModelName(fold_count), save_best_only=True, monitor='val_loss', mode='min')
    
    # Training/fitting the model on training data, using validation data to avoid overfitting
    history = model.fit_generator(train_generator,
                use_multiprocessing=True,
                workers=6,
                steps_per_epoch=math.ceil(train_generator.n//train_generator.batch_size),
                epochs = 50,
                validation_steps=math.ceil(validation_generator.n//validation_generator.batch_size),
                callbacks=[earlyStopping, mcp_save],
                validation_data=validation_generator,)
    
    # save model weights
    model.load_weights("./model_"+str(fold_count)+".h5")
    
    # evaluate and validate model on test data
    result = model.evaluate_generator(generator=validation_generator,
            steps=validation_generator.n//validation_generator.batch_size)

    # store result of evaluation in a dictionary
    result = dict(zip(model.metrics_names,result))

    
    VAL_ACCURACIES.append(result['accuracy']) #add accuracy to the list
    VAL_LOSSES.append(result['loss'])   #add loss to the list
    tf.keras.backend.clear_session()  #clear session
    fold_count += 1 #increment fold count

In [None]:
# calculate average accuracy and loss across the folds
avg_acc = sum(VAL_ACCURACIES)/len(VAL_ACCURACIES)
avg_loss = sum(VAL_LOSSES)/len(VAL_LOSSES)

# Display validation accuracy and loss across each fold
for i in range(3):
    print("Validation Accuracy for  Fold = "+ str(i)+" is "+str(VAL_ACCURACIES[i]*100) +"  and Loss = "+str(VAL_LOSSES[i]))
  


In [None]:
print("Average Accuracy = "+str(avg_acc*100) +"  and Average Loss = "+str(avg_loss))