This notebook was used to process crop, houseplant, and Reddit images for CNN models

In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.model_selection import train_test_split

# for reproducibility
np.random.seed(42)

## Image Processing

In [2]:
# create list for healthy crop images
healthycrop_arrays = []
# define filepath for healthy crop images
healthycrop_path = 'Data/crop_images/healthy/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(healthycrop_path):
    if count < 500:
        try:
            # target_size automatically resizes each img on import
            healthy = load_img(healthycrop_path + file, target_size=(256, 256))
            healthy_arr = img_to_array(healthy) / 255
            healthycrop_arrays.append(healthy_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(healthycrop_arrays)} pictures converted.')  

500 pictures converted.


In [3]:
# create list for healthy houseplant images
healthyhouse_arrays = []
# define filepath for healthy houseplant images
healthyhouse_path = 'Data/houseplant_images/healthy/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(healthyhouse_path):
    if count < 451:
        try:
            # target_size automatically resizes each img on import
            healthy = load_img(healthyhouse_path + file, target_size=(256, 256))
            healthy_arr = img_to_array(healthy) / 255
            healthyhouse_arrays.append(healthy_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(healthyhouse_arrays)} pictures converted.')  

451 pictures converted.


In [4]:
# create list for diseased crop images
diseasedcrop_arrays = []
# define filepath for diseased crop images
diseased_path = 'Data/crop_images/diseased/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(diseased_path):
    if count < 500:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(diseased_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            diseasedcrop_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(diseasedcrop_arrays)} pictures converted.')  

500 pictures converted.


In [5]:
# create list for wilted houseplant images
wiltedhouse_arrays = []
# define filepath for wilted houseplant images
wilted_path = 'Data/houseplant_images/wilted/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(wilted_path):
    if count < 451:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(wilted_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            wiltedhouse_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(wiltedhouse_arrays)} pictures converted.')  

451 pictures converted.


In [6]:
# create list for diseased reddit images
diseasedreddit_arrays = []
# define filepath for diseased reddit images
diseasedreddit_path = 'Data/reddit/diseased/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(diseasedreddit_path):
    if count < 141:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(diseasedreddit_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            diseasedreddit_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(diseasedreddit_arrays)} pictures converted.')  

Error for file: .DS_Store
140 pictures converted.


In [7]:
# create list for wilted reddit images
wiltedreddit_arrays = []
# define filepath for wilted reddit images
wiltedreddit_path = 'Data/reddit/wilted/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(wiltedreddit_path):
    if count < 174:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(wiltedreddit_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            wiltedreddit_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(wiltedreddit_arrays)} pictures converted.')  

174 pictures converted.


## CNN Modeling

In [8]:
# X should contain all converted images
X = healthycrop_arrays + healthyhouse_arrays + diseasedcrop_arrays + wiltedhouse_arrays + diseasedreddit_arrays + wiltedreddit_arrays
# convert to array and check shape
X_arr = np.array(X)
print(f'X shape: {X_arr.shape}')

# 0 for healthy, 1 for not healthy
y = [0] * 951 + [1] * 951 + [1] * 314
# convert to array and check shape
y = np.array(y)
print(f'y shape: {y.shape}')

X shape: (2216, 256, 256, 3)
y shape: (2216,)


In [9]:
# train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)
X_train = np.array(X_train)
X_test = np.array(X_test)

In [10]:
# build CNN model 1
model = Sequential()

model.add(Conv2D(16,(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size= (2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

# flatten and make dense
model.add(Flatten())
model.add(Dense(16,activation='relu'))

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=64,
          epochs=10)

Train on 1662 samples, validate on 554 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe8affbebd0>

The best score was in epoch 10 with a train accuracy of 79% and test accuracy of 79%.

In [11]:
# build CNN model 2
model = Sequential()

model.add(Conv2D(16,(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size= (2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

# flatten and make dense
model.add(Flatten())
model.add(Dense(64,activation='relu'))

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=64,
          epochs=10)

Train on 1662 samples, validate on 554 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe908b60310>

The best score was in epoch 10 with a train accuracy of 82% and test accuracy of 80%.

In [16]:
# build CNN model 3
model = Sequential()

model.add(Conv2D(16,(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size= (2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(16,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

# flatten and make dense
model.add(Flatten())
model.add(Dense(128,activation='relu'))

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=64,
          epochs=10)

Train on 1662 samples, validate on 554 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe919c14550>

The best score was in epoch 8 with a train accuracy of 85% and test accuracy of 80%.

In [17]:
# build CNN model 4
model = Sequential()

model.add(Conv2D(32,(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size= (2,2)))

model.add(Conv2D(32,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

model.add(Conv2D(32,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

# flatten and make dense
model.add(Flatten())
model.add(Dense(32,activation='relu'))

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=64,
          epochs=10)

Train on 1662 samples, validate on 554 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fef1c55b050>

The best score was in epoch 6 with a train accuracy of 84% and test accuracy of 80%.