This notebook was used to process crop and houseplant images for a basic CNN model

In [None]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.model_selection import train_test_split

# for reproducibility
np.random.seed(42)

## Image Processing

In [9]:
# create list for healthy crop images
healthycrop_arrays = []
# define filepath for healthy crop images
healthycrop_path = 'Data/crop_images/healthy/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(healthycrop_path):
    if count < 500:
        try:
            # target_size automatically resizes each img on import
            healthy = load_img(healthycrop_path + file, target_size=(256, 256))
            healthy_arr = img_to_array(healthy) / 255
            healthycrop_arrays.append(healthy_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(healthycrop_arrays)} pictures converted.')  

500 pictures converted.


In [10]:
# create list for healthy houseplant images
healthyhouse_arrays = []
# define filepath for healthy houseplant images
healthyhouse_path = 'Data/houseplant_images/healthy/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(healthyhouse_path):
    if count < 451:
        try:
            # target_size automatically resizes each img on import
            healthy = load_img(healthyhouse_path + file, target_size=(256, 256))
            healthy_arr = img_to_array(healthy) / 255
            healthyhouse_arrays.append(healthy_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(healthyhouse_arrays)} pictures converted.')  

451 pictures converted.


In [6]:
# create list for diseased crop images
diseasedcrop_arrays = []
# define filepath for diseased crop images
diseased_path = 'Data/crop_images/diseased/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(diseased_path):
    if count < 500:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(diseased_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            diseasedcrop_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(diseasedcrop_arrays)} pictures converted.')  

500 pictures converted.


In [11]:
# create list for wilted houseplant images
wiltedhouse_arrays = []
# define filepath for wilted houseplant images
wilted_path = 'Data/houseplant_images/wilted/'

# convert each image to normalized array and store
count = 0
for file in os.listdir(wilted_path):
    if count < 451:
        try:
            # target_size automatically resizes each img on import
            diseased = load_img(wilted_path + file, target_size=(256, 256))
            diseased_arr = img_to_array(diseased) / 255
            wiltedhouse_arrays.append(diseased_arr)
        except:
            print(f'Error for file: {file}')
        count +=1

print(f'{len(wiltedhouse_arrays)} pictures converted.')  

451 pictures converted.


## CNN Modeling

In [12]:
# X should contain all converted images
X = healthycrop_arrays + healthyhouse_arrays + diseasedcrop_arrays + wiltedhouse_arrays
# convert to array and check shape
X_arr = np.array(X)
print(f'X shape: {X_arr.shape}')

# 0 for healthy, 1 for not healthy
y = [0] * 951 + [1] * 951
# convert to array and check shape
y = np.array(y)
print(f'y shape: {y.shape}')

X shape: (1902, 256, 256, 3)
y shape: (1902,)


In [15]:
# train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)
X_train = np.array(X_train)
X_test = np.array(X_test)

In [16]:
# build CNN model
model = Sequential()

model.add(Conv2D(64,(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size= (2,2)))

model.add(Conv2D(64,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size= (2,2), strides=(2,2)))

# flatten and make dense
model.add(Flatten())
model.add(Dense(64,activation='relu'))

# output layer
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train,
          validation_data=(X_test, y_test),
          batch_size=64,
          epochs=5)

Train on 1426 samples, validate on 476 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7feb37f9e290>

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 127, 127, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 125, 125, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 246016)            0         
_________________________________________________________________
dense (Dense)                (None, 64)                15745088  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 6

The best score was in epoch 4 with a train accuracy of 78% and test accuracy of 73%.