In [None]:
#import necessary libraries
import numpy as np 
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from random import shuffle
from keras.utils  import to_categorical

In [None]:
#Split Train Folder further into Train and Validation Folders at runtime
import math
import random
import tempfile

def _copy_symlinks(files, src_dir, dst_dir):
    for i in files:
        base_file_name = os.path.basename(i)
        src_file_path = os.path.join(src_dir, base_file_name)
        dst_file_path = os.path.join(dst_dir, base_file_name)
        src_file_path = os.path.abspath(src_file_path)
        dst_file_path = os.path.abspath(dst_file_path)
        os.symlink(src_file_path, dst_file_path)

def train_valid_split(original_dir, validation_split=0.1, seed=None):
    if seed is not None:
        random.seed(seed)    
    if not os.path.isdir(original_dir):
        raise NotADirectoryError
    tmp_dir = tempfile.TemporaryDirectory()
    train_dir = os.path.join(tmp_dir.name, 'train')
    valid_dir = os.path.join(tmp_dir.name, 'validation')

    # make subdirs in train tmp and valid tmp
    for root, dirs, files in os.walk(original_dir):
        if root == original_dir:
            continue
        sub_dir_name = os.path.basename(root)
        train_sub_dir_path = os.path.join(train_dir, sub_dir_name)
        valid_sub_dir_path = os.path.join(valid_dir, sub_dir_name)
        if not os.path.exists(train_sub_dir_path):
            os.makedirs(train_sub_dir_path)
        if not os.path.exists(valid_sub_dir_path):
            os.makedirs(valid_sub_dir_path)

    # distribute symlinks to train_tmp, test_tmp
    for root, dirs, files in os.walk(original_dir):
        if root == original_dir:
            continue
        sub_dir_name = os.path.basename(root)
        train_sub_dir_path = os.path.join(train_dir, sub_dir_name)
        valid_sub_dir_path = os.path.join(valid_dir, sub_dir_name)
        files = [os.path.join(root, f) for f in files]
        random.shuffle(files)
        valid_idx = math.ceil(validation_split * len(files))
        train_files = files[valid_idx:]
        valid_files = files[:valid_idx]
        _copy_symlinks(train_files, root, train_sub_dir_path)
        _copy_symlinks(valid_files, root, valid_sub_dir_path)
    return tmp_dir, train_dir, valid_dir

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

original_dir = '../input/complete-fruit-veg-dataset-v1/train'
#batch size indicates how many images should be fed to the model at the same time
batch_size = 32
# validation data should be 10% of training data
validation_split = 0.1

#split train folder into train and validation folders
base_dir, train_dir, val_dir = train_valid_split(original_dir, validation_split, seed=1)

#ImageDataGenerator is used for normalization of dataset and to perform Augmentations on the Data
train_datagen = ImageDataGenerator(
    rescale = (1./255),
    rotation_range=90,
    width_shift_range=.2,
    zoom_range=0.2,
    height_shift_range=.2,
    brightness_range=(0.9,1.5),
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale = (1./255))
validation_datagen = ImageDataGenerator(rescale = (1./255))

# flow_from_directory supplies images from a directory, and assign labels to images according to the
# folder name in which they are present
# It eliminates need for having a .csv files for labels explicitly
# target_size transforms the images to specified dimensions. In our case input layer of model accepts
# images of dimensions 224 * 224
# class_mode indicates if it is a binary class or multi class problem
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size= 32,
    class_mode='categorical',
    shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size= 32,
    class_mode='categorical',
    shuffle=True)


In [None]:
#importing required libraries building model and for training and testing purposes
import keras 
from keras.layers import Conv2D,MaxPooling2D , Activation, Flatten
from keras.models import Sequential
from tensorflow.keras.layers import *

In [None]:
model = Sequential()  #initialize sequential model. It allows to stack layers sequentially
# adding convolution layers and Pooling layers
model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))

model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))

model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))

model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
# flattening the output vector into a single dimension so that it can be fed to dense layers for prediction
model.add(Flatten())
# adding dense layers
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
#finally the output layer predicts the output label of image
model.add(Dense(units=3, activation="softmax"))



In [None]:
#compiling the model
model.compile(optimizer= keras.optimizers.Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
#using callback functions offered by keras to avoid overfitting and to save the best model during training
from keras.callbacks import ModelCheckpoint, EarlyStopping

filepath = ('./rottenvsfresh_single_multi_classifier.h5')
# stops training when validation loss does not improve for consecutive 4 epochs
earlyStopping = EarlyStopping(monitor='val_loss', verbose=0, mode='min', patience = 4)
# saves best model with minimum loss
mcp_save = ModelCheckpoint(filepath, save_best_only=True, monitor='val_loss', mode='min')

In [None]:
# Training/fitting the model on training data, using validation data to avoid overfitting
history = model.fit_generator(generator=train_generator,validation_data=validation_generator,
use_multiprocessing=True,
workers=6,
steps_per_epoch=math.ceil(train_generator.n//train_generator.batch_size),
epochs = 50,
validation_steps=math.ceil(validation_generator.n//validation_generator.batch_size),
callbacks=[earlyStopping, mcp_save])

# save the weights of model in a directory
model.save_weights("./rottenvsfresh_single_multi_classifier.h5")



In [None]:
import matplotlib.pyplot as plt

# plotting graph of Training accuracy with respect to Validation accuracy across various epochs
plt.plot(history.epoch,history.history['val_accuracy'],'-b',label='Validation Accuracy')
plt.plot(history.epoch,history.history['accuracy'],'-g',label='Training Accuracy')

plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# plotting graph of Training loss with respect to Validation loss across various epochs
loss_train = history.history['loss']
loss_val = history.history['val_loss']
epochs = history.epoch
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# For Evaluation on Test Data, we first have to load our saved model
model.load_weights("./rottenvsfresh_single_multi_classifier.h5")

# extract test images
testing_generator = test_datagen.flow_from_directory(
    '../input/complete-fruit-veg-dataset-v1/test',
    target_size=(224, 224),
    batch_size= 1,
    class_mode='categorical',
    shuffle=False)

STEP_SIZE_TEST=testing_generator.n    # at each step label of 1 image should be evaluated

# Finally test our model on the test data
pred=model.evaluate_generator(testing_generator,
steps=STEP_SIZE_TEST,
verbose=1)