In [None]:
# Obtain plant image paths:
import random
random.seed(12)

import os
import numpy as np
from glob import glob

image_paths = []
for i, dir in enumerate(os.listdir("data")):
    for sub_dir in glob("data\\" + dir + "\\*"):
        image_paths.append([sub_dir, i]) # (path and index)

print('There are %d plant images.' % len(image_paths))

In [None]:
# Split data into training (70%), validation (20%), and testing (10%) sets:
from keras.utils import np_utils
random.shuffle(image_paths)

training_count = int(len(image_paths) * 0.7)
validation_count = int(len(image_paths) * 0.2)

training_paths = np.array([image_path[0] for image_path in image_paths[:training_count]])
training_targets = np_utils.to_categorical(np.array([image_path[1] for image_path in image_paths[:training_count]]), 12)
del image_paths[:training_count]

validation_paths = np.array([image_path[0] for image_path in image_paths[:validation_count]])
validation_targets = np_utils.to_categorical(np.array([image_path[1] for image_path in image_paths[:validation_count]]), 12)
del image_paths[:validation_count]

testing_paths = np.array([image_path[0] for image_path in image_paths])
testing_targets = np_utils.to_categorical(np.array([image_path[1] for image_path in image_paths]), 12)

print('There are %d training images, %d validation images, and %d testing images.' % (len(training_paths), len(validation_paths), len(testing_paths)))

In [None]:
# Load images, resize them to 224 x 224 pixels, and divide each channel pixel by 255:
from keras.preprocessing import image
from tqdm import tqdm
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

def path_to_tensor(image_path):
    # Load RGB image as PIL.Image.Image type:
    img = image.load_img(image_path, target_size=(224, 224))
    # Convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3):
    x = image.img_to_array(img)
    # Convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor:
    return np.expand_dims(x, axis=0)

def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(image_path) for image_path in tqdm(image_paths)])

training_tensors = paths_to_tensor(training_paths).astype('float32') / 255
validation_tensors = paths_to_tensor(validation_paths).astype('float32') / 255
testing_tensors = paths_to_tensor(testing_paths).astype('float32') / 255

In [None]:
# Set up the Convolutional Neural Network:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU, PReLU

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=2, padding='same', input_shape=(224, 224, 3)))
model.add(PReLU())
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=32, kernel_size=2, padding='same'))
model.add(PReLU())
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=64, kernel_size=2, padding='same'))
model.add(PReLU())
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=128, kernel_size=2, padding='same'))
model.add(PReLU())
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=256, kernel_size=2, padding='same'))
model.add(PReLU())
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=512, kernel_size=2, padding='same'))
model.add(PReLU())
model.add(GlobalAveragePooling2D())

model.add(Dense(12, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

In [None]:
# Train model:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', verbose=1, save_best_only=True)

model.fit(training_tensors, training_targets, 
          validation_data=(validation_tensors, validation_targets),
          epochs=1, batch_size=20, callbacks=[checkpointer], verbose=1)

print("Finished training model.")

In [None]:
# Load weights:
model.load_weights('saved_models/weights.best.from_scratch.hdf5')

# Get index of predicted plant species for each image in testing set:
plant_species_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in testing_tensors]

# Report micro-averaged F1-score:
from sklearn.metrics import f1_score
test_accuracy = f1_score(np.argmax(testing_targets, axis=1), plant_species_predictions, average='micro')
print('Micro-averaged F1-score: %.4f' % test_accuracy)

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(plant_species_predictions, np.argmax(testing_targets, axis=1)))