In [1]:
#cucumber 3
import numpy as np 
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from random import shuffle
from keras.utils  import to_categorical
from sklearn.model_selection import KFold, StratifiedKFold

In [2]:
def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        
        if input_img.ndim == 3:
            img_h, img_w, img_c = input_img.shape
        elif input_img.ndim == 2:
            img_h, img_w = input_img.shape

        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            if input_img.ndim == 3:
                c = np.random.uniform(v_l, v_h, (h, w, img_c))
            if input_img.ndim == 2:
                c = np.random.uniform(v_l, v_h, (h, w))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w] = c

        return input_img
    

    return eraser

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

original_dir ="../input/total-cucumber-multi/Total_Cucumber_multi/Total_Cucumber"
total_data = pd.read_csv('../input/total-cucumber-multi/cucumber_3.csv')
labels = total_data[['label']]
names = total_data[['name']]
                         
stratkf = StratifiedKFold(n_splits = 3, random_state = 7, shuffle = True)


VAL_ACCURACIES = []
VAL_LOSSES = []

fold_count = 1


train_datagen = ImageDataGenerator(
    rescale = (1./255),
    rotation_range=90,
    width_shift_range=.2, 
    height_shift_range=.2,
    zoom_range = 0.2,
    brightness_range=(0.9,1.5),
    horizontal_flip=True,
    preprocessing_function=get_random_eraser(v_l=0, v_h=1))

#test_datagen = ImageDataGenerator(rescale = (1./255))
validation_datagen = ImageDataGenerator(rescale = (1./255))




In [4]:
def getModelName(i):
    return 'model_'+str(i)+'.h5'

In [5]:
import keras 
from keras.layers import Dense,Dropout, Conv2D,MaxPooling2D , Activation, Flatten
from keras.models import Sequential
from tensorflow.keras.layers import *
import math
import tensorflow as tf

In [6]:
for tr_index, val_index in stratkf.split(names,labels):
    training_data = total_data.iloc[tr_index]
    validation_data = total_data.iloc[val_index]



    train_generator = train_datagen.flow_from_dataframe(
        training_data,
        directory = original_dir,
        target_size=(224, 224),
        x_col = "name",
        y_col = "label",
        batch_size= 32,
        class_mode='categorical',
        shuffle=True)

    validation_generator = validation_datagen.flow_from_dataframe(
        validation_data,
        directory = original_dir,
        target_size=(224, 224),
        x_col = "name",
        y_col = "label",
        batch_size= 32,
        class_mode='categorical',
        shuffle=True)
    
    model = Sequential()

    model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=128, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=256, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))
    model.add(Conv2D(filters=512, kernel_size=(3,3),padding="same", activation="relu"))

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))

    model.add(Flatten())

    model.add(Dense(units=4096,activation="relu"))
    model.add(Dense(units=4096,activation="relu"))

    model.add(Dense(units=3, activation="softmax"))
    model.compile(optimizer= keras.optimizers.Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()


    from keras.callbacks import ModelCheckpoint, EarlyStopping
    earlyStopping = EarlyStopping(monitor='val_loss', verbose=0, mode='min', patience = 6)
    mcp_save = ModelCheckpoint('./'+getModelName(fold_count), save_best_only=True, monitor='val_loss', mode='min')
    
    
    history = model.fit_generator(train_generator,
                use_multiprocessing=True,
                workers=6,
                steps_per_epoch=math.ceil(train_generator.n//train_generator.batch_size),
                epochs = 50,
                validation_steps=math.ceil(validation_generator.n//validation_generator.batch_size),
                callbacks=[earlyStopping, mcp_save],
                validation_data=validation_generator,)
    
    
    model.load_weights("./model_"+str(fold_count)+".h5")
    
    result = model.evaluate_generator(generator=validation_generator,
            steps=validation_generator.n//validation_generator.batch_size)
    print(result)
    
    result = dict(zip(model.metrics_names,result))

    VAL_ACCURACIES.append(result['accuracy'])
    VAL_LOSSES.append(result['loss'])
    tf.keras.backend.clear_session()
    fold_count += 1

Found 932 validated image filenames belonging to 3 classes.
Found 466 validated image filenames belonging to 3 classes.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 64)      1792      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 128)     147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 128)       0         
__

In [7]:
avg_acc = sum(VAL_ACCURACIES)/len(VAL_ACCURACIES)
avg_loss = sum(VAL_LOSSES)/len(VAL_LOSSES)
for i in range(3):
    print("Validation Accuracy for  Fold = "+ str(i)+" is "+str(VAL_ACCURACIES[i]*100) +"  and Loss = "+str(VAL_LOSSES[i]))
    
print("Average Accuracy = "+ str(i)+" is "+str(avg_acc*100) +"  and Average Loss = "+str(avg_loss))

Validation Accuracy for  Fold = 0 is 94.41964030265808  and Loss = 0.11942537128925323
Validation Accuracy for  Fold = 1 is 93.75  and Loss = 0.0817159041762352
Validation Accuracy for  Fold = 2 is 93.75  and Loss = 0.08898355811834335
Average Accuracy = 2 is 93.97321343421936  and Average Loss = 0.09670827786127727


In [8]:
# import matplotlib.pyplot as plt

# plt.plot(history.epoch,history.history['val_accuracy'],'-b',label='Validation Accuracy')
# plt.plot(history.epoch,history.history['accuracy'],'-g',label='Training Accuracy')

# plt.title('Training and Validation Accuracy')
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.show()

# # print(history.history['va'])
# loss_train = history.history['loss']
# loss_val = history.history['val_loss']
# epochs = history.epoch
# plt.plot(epochs, loss_train, 'g', label='Training loss')
# plt.plot(epochs, loss_val, 'b', label='validation loss')
# plt.title('Training and Validation loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

In [9]:
# model.load_weights("./rottenvsfresh_apple.h5")

In [10]:
# testing_generator = test_datagen.flow_from_directory(
#     '../input/apples7030/Apples/test',
#     target_size=(224, 224),
#     batch_size= 1,
#     class_mode='binary',
#     shuffle=False)
# STEP_SIZE_TEST=testing_generator.n
# print(STEP_SIZE_TEST)
# # valid_generator.reset()
# pred=model.predict_generator(testing_generator,
# steps=STEP_SIZE_TEST,
# verbose=1)

In [11]:
# orignal_labels=testing_generator.labels

In [12]:
# # #print(np.sum(orignal_labels))
# for i in range(len(pred)):
#     if pred[i]>0.5:
#         pred[i]=1
#     else:
#         pred[i]=0
# pred=pred[:,0]

In [13]:
# # predicted_class_indices=np.argmax(pred,axis=1)
# # print(predicted_class_indices)

# labels = (testing_generator.class_indices)

# labels = dict((v,k) for k,v in labels.items())

# predictions = [labels[k] for k in list(pred)]

# #print(predictions)
# filenames=testing_generator.filenames
# results=pd.DataFrame({"Filename":filenames,
#                       "Predictions":predictions})

# print(results)

In [14]:
# acc=np.sum(np.array(orignal_labels)==np.array(pred))
# print("Prediction Accuracy = ",acc/len(orignal_labels))