In [None]:
# to handle datasets
import pandas as pd
import numpy as np

from glob import glob
import os

import matplotlib.pyplot as plt
%matplotlib inline
import cv2

# to display all the columns of the dataframe in the notebook
pd.pandas.set_option('display.max_columns', None)

In [None]:
# data_preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# evaluate model and separate train and test
from sklearn.metrics import confusion_matrix

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# for the convolutional network
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.preprocessing import image
from keras.utils import to_categorical

# Load Images / Data

In [None]:
# here are all our images
DATA_FOLDER = 'v2-plant-seedling-dataset'

In [None]:
# each wee class is in a dedicated folder
os.listdir(DATA_FOLDER)

In [None]:
# Let's walk over the directory structure, so we understand
# how the images are stored

for class_folder_name in os.listdir(DATA_FOLDER):
    class_folder_path = os.path.join(DATA_FOLDER, class_folder_name)
    for image_path in glob(os.path.join(class_folder_path, '*.png')):
        print(image_path)

In [None]:
# Let's creare a dataframe:
# the dataframe stores the path to the image in one column
# and the class of the weed (the target) in the next column

images_df = []

# navigate within each folder
for class_folder_name in os.listdir(DATA_FOLDER):
    class_folder_path = os.path.join(DATA_FOLDER, class_folder_name)

    # collect everry image path
    for image_path in glob(os.path.join(class_folder_path, '*.png')):
        tmp = pd.DataFrame([image_path, class_folder_name]).T
        images_df.append(tmp)

# concatenate the final df
images_df = pd.concat(images_df, axis=0, ignore_index=True)
images_df.columns = ['image', 'target']
images_df.head(10)

In [None]:
# how many images do we've got per class?
# this should be give similar results to what we observerd
# when we inspected the length of image listin the dictionary

images_df['target'].value_counts()

In [None]:
# let's isolate a path, for demo
# we want to load the image in this path later

images_df.loc[0, 'image']

In [None]:
# Let's visualise a few images
# if the images you see in your notebook are not the same, don't

def plot_single_image(df, image_number):
    im = cv2.imread(df.loc[image_number, 'image'])
    plt.title(df.loc[image_number, 'target'])
    plt.imshow(im)

plot_single_image(images_df, 0)

In [None]:
plot_single_image(images_df, 3000)

In [None]:
plot_single_image(images_df, 1000)

In [None]:
# Lot's go ahead and plot a bunch of imges together, 
# so we get a better filing of how out images look like

def plot_for_class(df, label):
    # function plots 9 images
    nb_rows = 3
    nb_cols = 3

    fig, axs = plt.subplots(nb_rows, nb_cols, figsize=(10, 10))

    n = 0
    for i in range(0, nb_rows):
        for j in range(0, nb_cols):
            tmp = df[df['target'] == label]
            tmp.reset_index(drop=True, inplace=True)
            im = cv2.imread(tmp.loc[n, 'image'])
            axs[i, j].imshow(im)
            n += 1

In [None]:
plot_for_class(images_df, 'Cleavers')

In [None]:
plot_for_class(images_df, 'Maize')

In [None]:
plot_for_class(images_df, 'Common Chickweed')

# Separate train and test

In [None]:
# train_test_split

X_train, X_test, y_train, y_test = train_test_split(images_df['image'], images_df['target'], 
                                                    test_size=.20, random_state=101)

print(X_train.shape)
print(X_test.shape)

In [None]:
# the indeces of the training data are mixed
# this will couse problems later

X_train.head()

In [None]:
# reset index, because later we iterate over row number

X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
X_train.head()

In [None]:
# reset index in target as well

y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

In [None]:
# percentage of images within each class

y_train.value_counts() / len(y_train)

In [None]:
# should be the same in the test set

y_test.value_counts() / len(y_test)

In [None]:
# Let's prepare the target
# it is a multiclass classification, so we need to make
# one hot encoding of ther target

encoder = LabelEncoder()
encoder.fit(y_train)

train_y = to_categorical(encoder.transform(y_train))
test_y = to_categorical(encoder.transform(y_test))

train_y

In [None]:
# The images in our folders, are all different sizes
# For neural networks however, we need images in the same size
# The imges will all be resized to this size:

IMAGE_SIZE = 150

In [None]:
def im_resize(df, n):
    im = cv2.imread(df[n])
    im = cv2.resize(im, (IMAGE_SIZE, IMAGE_SIZE))
    return im

In [None]:
tmp = im_resize(X_train, 7)
tmp.shape

In [None]:
# the shape of the datasets needs to be (n1, n2, n3, n4)
# where n1 is the number of observations
# n2 and n3 are image width length
# and n4 indecates that it is a color image, so 3 planes per image

def create_dataset(df, image_size):
    # functions creates dataset as required for cnn
    tmp = np.zeros((len(df), image_size, image_size, 3), dtype='float32')

    for n in range(0, len(df)):
        im = im_resize(df, n)
        tmp[n] = im

    print(f'Dataset Images shape: {tmp.shape} size {tmp.size:,}')
    return tmp

In [None]:
x_train = create_dataset(X_train, IMAGE_SIZE)

In [None]:
x_test = create_dataset(X_test, IMAGE_SIZE)

In [None]:
# number of different classes

len(y_train.unique())

In [None]:
# this is our cnn

kernel_size = (3, 3)
pool_size = (2, 2)
first_filters = 32
second_filters = 64
third_filters = 128

dropout_conv = .3
dropout_dense = .3

model = Sequential()
model.add(Conv2D(first_filters, kernel_size, activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
model.add(Conv2D(first_filters, kernel_size, activation='relu'))
model.add(MaxPool2D(pool_size=pool_size))
model.add(Dropout(dropout_conv))

model.add(Conv2D(first_filters, kernel_size, activation='relu'))
model.add(Conv2D(first_filters, kernel_size, activation='relu'))
model.add(MaxPool2D(pool_size=pool_size))
model.add(Dropout(dropout_conv))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(dropout_dense))
model.add(Dense(12, activation='softmax'))

model.summary()

In [None]:
model.compile(Adam(learning_rate=.0001), loss='binary_crossentropy', metrics=['accuracy']) 

In [None]:
batch_size = 10
epochs = 8

In [None]:
filepath = 'model.keras'
checkpoint = ModelCheckpoint(filepath, monitor='accuracy', verbose=1,
                            save_best_only=True, mode='max')

reduce_lr = ReduceLROnPlateau(monitor='acc', factor=.5, patience=1, verbose=1,
                             mode='max', min_lr=.00001)

callbacks_list = [checkpoint, reduce_lr]

history = model.fit(x=x_train, y=train_y, 
                   batch_size=batch_size,
                   validation_split=10,
                   epochs=epochs,
                   verbose=2,
                   callbacks=callbacks_list)