In [1]:
%pip install icecream

Collecting icecream
  Using cached icecream-2.1.2-py2.py3-none-any.whl (8.3 kB)
Installing collected packages: icecream
Successfully installed icecream-2.1.2



In [4]:
# library imports
from datetime import datetime
import pandas as pd
import numpy as np
import pathlib
import os
import sys
import tensorflow as tf
import cv2
from icecream import ic
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


ModuleNotFoundError: No module named 'tensorflow'

In [6]:
# add drive base file paths
# dataset is from https://www.kaggle.com/rashikrahmanpritom/plant-disease-recognition-dataset
base_folder_path = 'dataset/'
model_export_folder_path = 'models/'
tr_dir = base_folder_path + 'Train/Train/'
val_dir = base_folder_path + 'Validation/Validation/'
te_dir = base_folder_path + 'Test/Test/'

In [11]:
EPOCHS = 10
IMG_WIDTH = int(168) # adjusted image width, original is 4000
IMG_HEIGHT = int(168) # adjusted image height, original is 2672
NUM_CATEGORIES = 3
TEST_SIZE = 0.4
categories_dict = {
    "Healthy": 0,
    "Powdery": 1,
    "Rust": 2
}
# for printing out predictions
inv_categories_dict = {v: k for k, v in categories_dict.items()} 

def load_data(data_dir, categories_dict):
    """
    Load image data from directory `data_dir`.
    Assume `data_dir` has one directory named after each category, numbered
    0 through NUM_CATEGORIES - 1. Inside each category directory will be some
    number of image files.
    Return tuple `(images, labels)`. `images` should be a list of all
    of the images in the data directory, where each image is formatted as a
    numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
    be a list of integer labels, representing the categories for each of the
    corresponding `images`.
    """
    images = list()
    labels = list()

    # scan through directory and import all image files (resized before import)
    for itemA in os.scandir(data_dir):
        if itemA.is_dir():
            for itemB in os.scandir(itemA.path):
                if itemB.is_file():
                    # add image to list
                    img = cv2.imread(itemB.path)
                    resized_img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                    images.append(resized_img)
                    labels.append(categories_dict[itemA.name])
    return (np.array(images), np.array(labels))

def get_model():
    """
    Returns a compiled convolutional neural network model. Assume that the
    `input_shape` of the first layer is `(IMG_WIDTH, IMG_HEIGHT, 3)`.
    The output layer should have `NUM_CATEGORIES` units, one for each category.
    """
    # model building references: https://pyimagesearch.com/2018/12/31/keras-conv2d-and-convolutional-layers/
    # model building references: https://towardsdatascience.com/deep-learning-3-more-on-cnns-handling-overfitting-2bd5d99abe5d


    model = tf.keras.models.Sequential([
        # convolutional layer, learn 16 filters using 7x7 kernal
        # max-pooling layer, using 2x2 pool size
        tf.keras.layers.Conv2D(32, (7, 7), activation="relu", 
                               input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),

        # 2nd convolutional layer, learn 32 filters using 3x3 kernal
        # 2nd max-pooling layer, using 2x2 pool size
        tf.keras.layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),

        # 3rd convolutional layer, learn 64 filters using 3x3 kernal
        # 3rd max-pooling layer, using 2x2 pool size
        tf.keras.layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),

        # 4th convolutional layer, learn 128 filters using 3x3 kernal
        # 4th max-pooling layer, using 2x2 pool size
        tf.keras.layers.Conv2D(256, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(256, (3, 3), activation="relu", padding="same"),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.4),

        # flatten
        tf.keras.layers.Flatten(),

        # 512 unit hidden layer with 50% dropout
        tf.keras.layers.Dense(512, activation="relu"),
        tf.keras.layers.Dropout(0.5),

        # add output layer with output units for all of the signs
        tf.keras.layers.Dense(NUM_CATEGORIES, activation="softmax")
    ])

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

In [7]:
# file import
# load datasets


# editited code for spliting, 
# did not like the original dataset's proportion where only 10% testing and 4% validation
# combine all available data together to form 1 dataset, 
# and we will re-split data into training and testing sets using our defined proportions
images, labels = load_data(tr_dir, categories_dict)
images2, labels2 = load_data(val_dir, categories_dict)
images3, labels3 = load_data(te_dir, categories_dict)
images = np.append(images, np.append(images2, images3, axis = 0), axis = 0)
labels = np.append(labels, np.append(labels2, labels3, axis = 0), axis = 0)

labels = tf.keras.utils.to_categorical(labels)
x_tr, x_te, y_tr, y_te = train_test_split(
    np.array(images), np.array(labels), test_size=TEST_SIZE
)

# Additionally, evenlly split the training set again to training set and validation set
x_val, x_te, y_val, y_te = train_test_split(
    np.array(x_te), np.array(y_te), test_size=0.5
)

'''
# original data loading code
x_tr, y_tr = load_data(tr_dir, categories_dict)
y_tr = tf.keras.utils.to_categorical(y_tr, num_classes = 3, dtype = 'int')
ic(x_tr.shape, y_tr.shape)

x_val, y_val = load_data(val_dir, categories_dict)
y_val = tf.keras.utils.to_categorical(y_val, num_classes = 3, dtype = 'int')
ic(x_val.shape, y_val.shape)

x_te, y_te = load_data(te_dir, categories_dict)
y_te = tf.keras.utils.to_categorical(y_te, num_classes = 3, dtype = 'int')
ic(x_te.shape, y_te.shape)
'''

"\n# original data loading code\nx_tr, y_tr = load_data(tr_dir, categories_dict)\ny_tr = tf.keras.utils.to_categorical(y_tr, num_classes = 3, dtype = 'int')\nic(x_tr.shape, y_tr.shape)\n\nx_val, y_val = load_data(val_dir, categories_dict)\ny_val = tf.keras.utils.to_categorical(y_val, num_classes = 3, dtype = 'int')\nic(x_val.shape, y_val.shape)\n\nx_te, y_te = load_data(te_dir, categories_dict)\ny_te = tf.keras.utils.to_categorical(y_te, num_classes = 3, dtype = 'int')\nic(x_te.shape, y_te.shape)\n"

In [10]:
# Get a compiled neural network
model = get_model()

# Fit model on training data
model.fit(x_tr, y_tr, epochs=EPOCHS)

# Validate neural network performance
model.evaluate(x_val,  y_val, verbose=2)

# Evaluate neural network performance
model.evaluate(x_te,  y_te, verbose=2)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30

KeyboardInterrupt: 

In [None]:
# Save model to file
model_name = 'plant_disease_test_model'
model.save(model_export_folder_path + model_name)
print(f"Model saved to {model_export_folder_path + model_name}.")

INFO:tensorflow:Assets written to: /content/drive/My Drive/Colab Notebooks/models/plant_disease_test_model/assets
Model saved to /content/drive/My Drive/Colab Notebooks/models/plant_disease_test_model.
