In [113]:
import numpy as np
import pandas as pd
import cv2
import os
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from numpy import savez_compressed
from tqdm import tqdm_notebook as tqdm
from numpy import load
from sklearn.model_selection import train_test_split

In [114]:
images_path = "./images/"
files = os.listdir(images_path)

In [115]:
train_files = [i for i in files if "Train" in i]


In [116]:
print("Train: ", len(train_files))


Train:  1821


# Load Train CSV data

In [33]:
train_df = pd.read_csv("train.csv")

In [34]:
train_df.head(10)

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0
5,Train_5,1,0,0,0
6,Train_6,0,1,0,0
7,Train_7,0,0,0,1
8,Train_8,0,0,0,1
9,Train_9,1,0,0,0


In [35]:
print("Total: ", len(train_df))
print("Healthy: ",len(train_df[train_df["healthy"] == 1]))
print("Diseased: ",len(train_df[train_df["healthy"] == 0]))

Total:  1821
Healthy:  516
Diseased:  1305


In [36]:
y_dict = train_df.set_index('image_id')['healthy'].to_dict()

In [72]:
y_dict

{'Train_0': 0,
 'Train_1': 0,
 'Train_2': 1,
 'Train_3': 0,
 'Train_4': 1,
 'Train_5': 1,
 'Train_6': 0,
 'Train_7': 0,
 'Train_8': 0,
 'Train_9': 1,
 'Train_10': 0,
 'Train_11': 0,
 'Train_12': 0,
 'Train_13': 1,
 'Train_14': 0,
 'Train_15': 0,
 'Train_16': 0,
 'Train_17': 0,
 'Train_18': 1,
 'Train_19': 0,
 'Train_20': 0,
 'Train_21': 0,
 'Train_22': 0,
 'Train_23': 0,
 'Train_24': 1,
 'Train_25': 0,
 'Train_26': 0,
 'Train_27': 0,
 'Train_28': 0,
 'Train_29': 0,
 'Train_30': 0,
 'Train_31': 0,
 'Train_32': 0,
 'Train_33': 1,
 'Train_34': 0,
 'Train_35': 0,
 'Train_36': 0,
 'Train_37': 0,
 'Train_38': 0,
 'Train_39': 0,
 'Train_40': 0,
 'Train_41': 0,
 'Train_42': 0,
 'Train_43': 1,
 'Train_44': 0,
 'Train_45': 0,
 'Train_46': 1,
 'Train_47': 0,
 'Train_48': 0,
 'Train_49': 0,
 'Train_50': 0,
 'Train_51': 0,
 'Train_52': 0,
 'Train_53': 1,
 'Train_54': 1,
 'Train_55': 1,
 'Train_56': 0,
 'Train_57': 0,
 'Train_58': 0,
 'Train_59': 0,
 'Train_60': 0,
 'Train_61': 0,
 'Train_62': 0,
 '

In [37]:
size = (180,120)

# Load Train Image data

In [31]:
def save_tarin_images_as_list(train_files, images_path, y_dict, size):
    data_list = list()
    label_list = list()
    for file in tqdm(train_files):
        path = images_path + file
        pixels = load_img(path, target_size= size)
        pixels = img_to_array(pixels)
        data_list.append(pixels)
        label_list.append(y_dict[file.split(".")[0]])
    return [data_list,label_list]

In [32]:

data = save_tarin_images_as_list(train_files, images_path, y_dict, size)

HBox(children=(IntProgress(value=0, max=1821), HTML(value='')))




In [38]:
X = data[0]
y = data[1]

In [39]:
filename = 'PestData_Train.npz'
savez_compressed(filename, X, np.array([[i] for i in y]))
print('Saved dataset: ', filename)

Saved dataset:  PestData_Train.npz


# Build CNN model

In [40]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [41]:
model = Sequential()
model.add(Conv2D(16, (3, 3), input_shape=(size[0], size[1], 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(16, (3, 3)))
model.add(Activation('relu'))

# the model so far outputs 3D feature maps (height, width, features)

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [42]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 178, 118, 16)      448       
_________________________________________________________________
activation_21 (Activation)   (None, 178, 118, 16)      0         
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 89, 59, 16)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 87, 57, 32)        4640      
_________________________________________________________________
activation_22 (Activation)   (None, 87, 57, 32)        0         
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 43, 28, 32)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 41, 26, 32)        9248      
__________

In [43]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Setup for Training

In [44]:
data = load("PestData_Train.npz")
X, y = data['arr_0'],data['arr_1']

In [45]:
print(X.shape)
print(y.shape)

(1821, 180, 120, 3)
(1821, 1)


In [46]:
X = np.array(X)
y = np.array(y)
# Normalizing X values between 0 to 1.
X = X.astype('float32')
X /= np.max(X)

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=36)

In [48]:
from tensorflow.python.keras.callbacks import ModelCheckpoint
filepath="CNN_weights.h5"
checkpoint = ModelCheckpoint(filepath, monitor="val_acc", verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [49]:
model.fit(X_train, y_train, epochs=16, batch_size=16, callbacks=callbacks_list, validation_data=(X_test, y_test))

Train on 1274 samples, validate on 547 samples
Epoch 1/16

Epoch 00001: val_acc improved from -inf to 0.70932, saving model to CNN_weights.h5
Epoch 2/16

Epoch 00002: val_acc improved from 0.70932 to 0.72578, saving model to CNN_weights.h5
Epoch 3/16

Epoch 00003: val_acc did not improve from 0.72578
Epoch 4/16

Epoch 00004: val_acc improved from 0.72578 to 0.73492, saving model to CNN_weights.h5
Epoch 5/16

Epoch 00005: val_acc improved from 0.73492 to 0.73675, saving model to CNN_weights.h5
Epoch 6/16

Epoch 00006: val_acc did not improve from 0.73675
Epoch 7/16

Epoch 00007: val_acc did not improve from 0.73675
Epoch 8/16

Epoch 00008: val_acc did not improve from 0.73675
Epoch 9/16

Epoch 00009: val_acc did not improve from 0.73675
Epoch 10/16

Epoch 00010: val_acc improved from 0.73675 to 0.77697, saving model to CNN_weights.h5
Epoch 11/16

Epoch 00011: val_acc did not improve from 0.77697
Epoch 12/16

Epoch 00012: val_acc did not improve from 0.77697
Epoch 13/16

Epoch 00013: val

<keras.callbacks.History at 0x13487ebe0>

In [50]:
# serialize model to JSON
model_json = model.to_json()
with open("CNN_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("CNN_model.h5")
print("Saved model to disk")

Saved model to disk


# Testing CNN model

In [51]:
# load json and create model
from tensorflow.python.keras.models import model_from_json

json_file = open('CNN_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights("CNN_weights.h5")
print("Loaded model from disk")

Loaded model from disk


In [52]:
# evaluate loaded model on test data
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

print("Test data")
score = model.evaluate(X_test, y_test, verbose=1)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

Test data
acc: 79.34%


In [53]:
y_preds = model.predict(X_test, verbose=1)
y_preds.shape



(547, 1)

In [54]:
y_preds[:10]

array([[5.1106846e-01],
       [9.3845129e-03],
       [3.3378601e-06],
       [8.6456537e-05],
       [2.3318738e-01],
       [0.0000000e+00],
       [5.6624413e-07],
       [8.7640584e-03],
       [1.1214614e-04],
       [1.0565817e-03]], dtype=float32)

In [55]:
preds = model.predict_classes(X_test, verbose=1)
preds.shape



(547, 1)

In [56]:
model.predict_classes(X_test[10].reshape(-1,180,120,3))[0][0]

0

In [58]:
print("Actual healthy: ",len(y_test[y_test == 1]))
print("Predicted healthy: ",len(preds[preds == 1]))

Actual healthy:  159
Predicted healthy:  120


In [98]:
from sklearn.metrics import classification_report

print(classification_report(y_test, preds, target_names=['Non-healthy', 'Healthy']))

              precision    recall  f1-score   support

 Non-healthy       0.82      0.90      0.86       388
     Healthy       0.69      0.52      0.59       159

   micro avg       0.79      0.79      0.79       547
   macro avg       0.76      0.71      0.73       547
weighted avg       0.78      0.79      0.78       547



# Final Model in Use

In [99]:
from keras.preprocessing.image import img_to_array
import cv2 
from tensorflow.python.keras.models import model_from_json

def make_predict_CNN(image, size):
    X = cv2.resize(image, size) 
    X = img_to_array(X)
    X = np.array(X).reshape(-1, size[0], size[1], 3)
    X = X.astype('float32')
    X /= 255
    
    json_file = open('CNN_model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights("CNN_weights.h5")

    res = model.predict_classes(X)[0][0]
    
    return res


In [112]:
size = (180, 120)

image = cv2.imread("./images/Train_11.jpg", 1) 
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
res = make_predict_CNN(image, size)

if res == 0:
    print("Not healthy")
else:
    print("Healthy")

Healthy
