In [17]:
#Libraries
import pandas as pd
import numpy as np
import tensorflow as tf    #with gpu version
import matplotlib.pyplot as plt
import seaborn as sns 
import splitfolders

from tensorflow import keras    #with gpu version
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Rescaling
from keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
from keras.applications.mobilenet_v2 import MobileNetV2  
from keras.applications.vgg19 import VGG19, preprocess_input 
from keras.callbacks import BackupAndRestore, ReduceLROnPlateau, ModelCheckpoint

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.utils import class_weight 
from itertools import chain
from collections import Counter
from numba import cuda 

In [18]:
#Check if gpu is involved

#https://www.tensorflow.org/guide/gpu
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

print("If this outputs the matrix, then the GPU is working")
#https://stackoverflow.com/questions/58289983/low-nvidia-gpu-usage-with-keras-and-tensorflow?rq=1
with tf.device('/gpu:0'):
    with tf.compat.v1.Session() as sess:
        a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
        b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
        c = tf.matmul(a, b)
        print (sess.run(c))

Num GPUs Available:  1
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3100581976782060165
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3665166336
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7255887679826321343
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
]
Default GPU Device: /device:GPU:0
If this outputs the matrix, then the GPU is working
[[22. 28.]
 [49. 64.]]


In [None]:
#https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
#for testing purposes


datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222)

example_flow = datagen.flow_from_directory(
    directory = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Data\Plant Village\All\test",
    batch_size = 1)

#https://stackoverflow.com/questions/63355107/keras-imagedatagenerator-result-display-flow
for _ in range(10):
    img, label = example_flow.next()
    print(img.shape)   #  (1,256,256,3)
    plt.imshow(img[0])
    plt.show()

In [None]:
#Stick EDA here

#Class counts
#https://stackoverflow.com/questions/65632501/keras-flow-from-directory-how-to-get-number-of-samples-in-each-category

imflow = ImageDataGenerator(rescale = 1./255).flow_from_directory(r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Data\Plant Village\Apples\train")
counter = Counter(imflow.classes)  

print(counter.items())

## Sub-Network Creation

Here, the sub-networks, used later to comprise the full models, are created.  Each final model will have three plant specific sub-networks (apple, peach, and cherry leaf).  There will be four final models, Type-1 and Type-2, 'other'-inclusive and 'other'-exclusive.  

Type-1 models will have only one layer of sub-networks wherein task identification and disease classification are performed by the same sub-networks.

Type-2 models will have two layers of sub-networks.  The first layer will perform task-identification, the second disease classification.

'Other' inclusive or exclusive pertains to how the task identifiying class is trained (included in each Type-1 sub-network, and each first layer type-2 sub-network).  Inclusive indicates that plant types represented as the target of a sub-network may be used to train the 'other' class (off-target) in other sub-networks within the model.  Exclusive indicates that examples of plant types which are the target of any sub-network in the model are not used for off-target identification in any of the sub-networks

### Type 1, Other Inclusive

In [None]:
def AppleDiseaseOther_create():
    #Apple Disease Other model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(5))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('AppleDiseaseOther_1_Weights.h5')  
    model.save('AppleDiseaseOther_1_Model.h5')  
    
    return model

AppleDiseaseOtherMod1 = AppleDiseaseOther_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOther1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\AppleDiseaseOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def PeachDiseaseOther_create():
    #Peach Disease Other model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('PeachDiseaseOther_1_Weights.h5')  
    model.save('PeachDiseaseOther_1_Model.h5')  
    
    return model


PeachDiseaseOtherMod = PeachDiseaseOther_create()


In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOther1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\PeachDiseaseOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def CherryDiseaseOther_create():
    #Cherry Disease Other model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryDiseaseOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('CherryDiseaseOther_1_Weights.h5')  
    model.save('CherryDiseaseOther_1_Model.h5')  
    
    return model

CherryDiseaseOtherMod = CherryDiseaseOther_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOther')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\CherryDiseaseOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen,
                          workers = 32,
                          max_queue_size = 64)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

### Type 1, Other-Exclusive

In [None]:
def AppleDiseaseOtherNoex_create():
    #Apple Disease Other Noex model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(5))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('AppleDiseaseOtherNoex_1_Weights.h5')  
    model.save('AppleDiseaseOtherNoex_1_Model.h5')  
    
    return model

AppleDiseaseOtherNoexMod1 = AppleDiseaseOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOtherNoex1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\AppleDiseaseOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen,
                          workers = 32,
                          max_queue_size = 64)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def PeachDiseaseOtherNoex_create():
    #Peach Disease Other Noex model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('PeachDiseaseOtherNoex_1_Weights.h5')  
    model.save('PeachDiseaseOtherNoex_1_Model.h5')  
    
    return model

PeachDiseaseOtherNoexMod1 = PeachDiseaseOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOtherNoex')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\PeachDiseaseOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen,
                          workers = 32,
                          max_queue_size = 64)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def CherryDiseaseOtherNoex_create():
    #Peach Disease Other Noex model 1
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryDiseaseOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('CherryDiseaseOtherNoex_1_Weights.h5')  
    model.save('CherryDiseaseOtherNoex_1_Model.h5')  
    
    return model

CherryDiseaseOtherNoexMod1 = CherryDiseaseOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOtherNoex')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\CherryDiseaseOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen,
                          workers = 32,
                          max_queue_size = 64)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

### Type-2, Other Inclusive

In [None]:
def AppleOther_create():
    #Apple Other model 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=35,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('AppleOther_Weights.h5')  
    model.save('AppleOther_Model.h5')  
    
    return model

AppleOtherMod = AppleOther_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOther2')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\AppleOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def PeachesOther_create():
    #PeachesOther model 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachesOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachesOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachesOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0005, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('PeachesOther_Weights.h5')  
    model.save('PeachesOther_Model.h5')  
    
    return model

PeachesOtherMod = PeachesOther_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachesOther')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\PeachesOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def CherryOther_create():
    #Cherry Other model 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryOther\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryOther" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('CherryOther_Weights.h5')  
    model.save('CherryOther_Model.h5')  
    
    return model

CherryOtherMod1 = CherryOther_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\CherryOther_Model.h5')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\CherryOther\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen,
                          max_queue_size = 64,
                          workers = 32)

y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

### Type-2, Other Exclusive

In [None]:
def AppleOtherNoex_create():
    #Apple Other Noex 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\AppleOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('AppleOtherNoex_Weights.h5')  
    model.save('AppleOthreNoex_Model.h5')  
    
    return model

AppleOtherNoex1 = AppleOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOtherNoex1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\AppleOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def PeachOtherNoex_create():
    #Peach Other Noex 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\PeachOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('PeachOtherNoex_Weights.h5')  
    model.save('PeachOtherNoex_Model.h5')  
    
    return model

PeachOtherNoex1 = PeachOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachOtherNoex1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\PeachOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",
    shuffle = False)

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

In [None]:
def CherryOtherNoex_create():
    #Cherry Other Noex 1
    #Would filter images before a disease classifier
    #
    
    ########################################################################################
    #First, define the model structure
    
    model = Sequential()
    model.add(MobileNetV2(include_top = False, input_shape = (224,224,3)))  #Loads Mobilenet Layers, without the top layer

    model.get_layer("mobilenetv2_1.00_224").trainable = False  #We'll keep the weights from mobilenetV2
    model.add(Flatten())   #These next bits flatten the output from the mobilenet (without the top layer), and add add a couple dense layers which we train on the apple data
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.add(Activation("softmax"))  #Softmax for multiclass classification
    
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',   # I hear this one works well
                  metrics=['accuracy'])
    
    
    #######################################################################################
    #Took some code from link below
    #https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
    
    #batches of 32 images @ 256^2 pixels seems to work well with my gpu. About twice as fast as running on cpu
    batchsize = 32

    #Specifies the range of augmentation for all images (preprocessing)
    datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.1,
        brightness_range = (0.6,1.3),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.22222) #this ends up being about 20% of the entire dataset (we seperate testing prior to anything)

    #Sets the training data generator
    train_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "training")


    #Sets the validation data generator
    val_generator = datagen.flow_from_directory(
        directory = r"C:\Datasets\Plant Village\CherryOtherNoex\train",
        target_size = (224,224),
        batch_size = batchsize,
        class_mode = "categorical",
        subset = "validation")
    
    
    #Get's class weights
    #https://stackoverflow.com/questions/41648129/balancing-an-imbalanced-dataset-with-keras-image-generator
    
    class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

    train_class_weights = dict(enumerate(class_weights))
    

    
    ################################################################33
    #This fits the model 
    checkpoint_path = r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryOtherNoex" #path for checkpoints
    
    callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0, min_delta = 0.0002, verbose = 1), #Reduces the learning rate if no improvements are made.  Hopefully make for a more accurate model
                 ModelCheckpoint(filepath = checkpoint_path, monitor = "val_accuracy", verbose = 1, save_best_only = True, mode = "auto", save_freq = "epoch") #Saves the model when it performes best
                ] 
    
    model.fit(
        train_generator,
        steps_per_epoch=np.ceil(train_generator.samples / batchsize),
        epochs=25,
        validation_data=val_generator,
        validation_steps=np.ceil(val_generator.samples / batchsize),
        class_weight=train_class_weights,
        workers = 32,
        #use_multiprocessing = True,
        max_queue_size = 64,
        callbacks = callbacks
        
        )

    #Saves it
    model.save_weights('CherryOtherNoex_Weights.h5')  
    model.save('CherryOtherNoex_Model.h5')  
    
    return model

CherryOtherNoex1 = CherryOtherNoex_create()

In [None]:
#Evaluate on the test set
testmodel = keras.models.load_model(r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryOtherNoex1')

testdata = ImageDataGenerator(rescale=1./255)

testgen = testdata.flow_from_directory(
    directory = r"C:\Datasets\Plant Village\PeachOtherNoex\test",
    target_size = (224,224),
    batch_size = 32,
    class_mode = "categorical",

metrics = tf.keras.models.Model.evaluate(testmodel,
                  testgen)

print("Loss: " + str(round(metrics[0],4)), "Accuracy: " + str(round(metrics[1],4)))

#https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = testmodel.predict(testgen)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

## Sub-Network Consolidation Functions

Below are the functions that take the sub-networks generated previusly, and combine them into full models that can handle multiple types of plants.  There are two functions, one for both Type-1 and Type-2 models.

### Model Type-1 

In [None]:
def model_type_1(mod_paths, test_path, plants, class_nums, batch_size = 8, eval_mode = False):
    
    
    #######################
    #Load in the data and process it
    
    dataset = tf.keras.preprocessing.image_dataset_from_directory(test_path,
                                                                 image_size = (224, 224),
                                                                 shuffle = False,
                                                                 label_mode = "categorical",
                                                                 batch_size = batch_size)
    #Rescaling (because we didn't build it into the models)
    #https://github.com/tensorflow/tensorflow/issues/39595
    rescale = Rescaling(scale=1.0/255)
    dataset = dataset.map(lambda image,label:(rescale(image),label))
    
    
    ##################################################################################
    #Load the models, make predictions, save them together 
    
    compiled_preds = pd.DataFrame()
    for mod_path, predifex, classes in zip(mod_paths, plants, class_nums):

        
        testmodel = keras.models.load_model(mod_path)

        preds = testmodel.predict(dataset,
                         workers = 32,
                         max_queue_size = 32,
                         verbose = 1)


        #Yeah.  You'd think there would be a simpler way to do this.  Probably is.
        cols = list(chain(*[[predifex + str(x) for x in range(classes)], ["not" + predifex]]))
        compiled_preds = pd.DataFrame(preds, columns = cols).join(compiled_preds)
        
    
    ###########################################################################
    #seperates out the best predictions for the most likely plant
    
    #Finds least likely 'other' among all predictions.  Removes 'not', yielding a list of most likely plant
    plant_labels = compiled_preds.filter(regex="not").idxmin(axis = 1).str.replace("not","") 


    selected_preds = pd.DataFrame(plant_labels, columns = ["plant"]) #dataframe with plant labels
    
    i = 0
    while i < len(plant_labels):

        #Below will append the disease label (0,1,2, etc) for the appropriate plant to a dataframe containing the plant
        #regex expression from: https://stackoverflow.com/questions/406230/regular-expression-to-match-a-line-that-doesnt-contain-a-word
        selected_preds.loc[i,"disease"] = compiled_preds.loc[i,:].filter(regex=plant_labels[i]).filter(regex = "^((?!not).)*$").idxmax(axis = 1).replace(plant_labels[i],"")

        i += 1


    ####################################################
    #Adds an absolute label to help make confusion matrices
    #Will go in order specified by order of paths/plant names.  make sure they lign up with the test-data directory
    
    i = 0
    while i < len(plants):

        plant = plants[i]
        add = sum(class_nums[0:i])

        selected_preds.loc[selected_preds["plant"] == plant,"absolute_label"] = pd.to_numeric(selected_preds["disease"]) + add


        i += 1

    
    #Return
    if eval_mode == False: #Standard classified results, with both absolute labels and classes per plant
    
        return selected_preds
    
    elif eval_mode == True:
        
        return compiled_preds


In [None]:
#Keep the variables here ordered in accordance to the way data dirctories are arranged

#Paths can either be .h5 or pb
mod_paths = [r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOtherNoex1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOtherNoex1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOtherNoex1"]

test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

plants = ["Apple", "Cherry", "Peach"]

class_nums = [4,2,2]

predictions = model_type_1(mod_paths, test_path, plants, class_nums, eval_mode = True) 

### Model Type-2

In [3]:
def model_type_2(layer1List, layer2List, testpath, plants, eval_mode = False, eval_class_nums = [], batch_size = 8):
    
    #Notes:
    #OOM errors with batch sizes of 32+
    
    #One big inneficiency with this is that it runs all data through all 2nd layer models
    #It's very difficult to filter the dataset so only appropriate data gets passed to respective 2nd layer models
    #Should probably try improving it again (good luck)

    #######################
    #Load in the data and process it
                                                                              ###############################################################################
    img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)#Why is this in here?  Remove and see if it breaks things next time 


    dataset = tf.keras.preprocessing.image_dataset_from_directory(testpath,
                                                                 image_size = (224, 224),
                                                                 shuffle = False,
                                                                 label_mode = "categorical",
                                                                 batch_size = batch_size)
    #Rescaling (because we didn't build it into the models)
    #https://github.com/tensorflow/tensorflow/issues/39595
    rescale = Rescaling(scale=1.0/255)
    dataset = dataset.map(lambda image,label:(rescale(image),label))

    ##########################
    #Load the layer 1 models, make predictions, save them 
    
    compiled_preds_layer1 = pd.DataFrame()
    for mod_path, predifex in zip(layer1List, plants):

        
        testmodel = keras.models.load_model(mod_path)

        preds = testmodel.predict(dataset,
                         workers = 32,
                         max_queue_size = 32,
                         verbose = 1)


        #We're going to take the highest certaintty from all models.   basically argmax on a compiled preiction set 
       
        compiled_preds_layer1 = pd.DataFrame(preds, columns = [predifex, "not" + predifex]).join(compiled_preds_layer1)

    
    
    
    ###########################
    #Load the layer 2 models, make predictions all data for each (inefficient)
    
    preds_layer2 = pd.DataFrame()
    for mod_path, predifex in zip(layer2List, plants):

        
        testmodel = keras.models.load_model(mod_path)
        
        preds = testmodel.predict(dataset,
                         workers = 32,
                         max_queue_size = 32,
                         verbose = 1)
        
        best_preds = np.argmax(preds, axis = 1)
        
        preds_layer2[predifex] = best_preds
        
        
    ###############################################
    #The first output will return all the results with a plant's diseases in one column for each possible plant, and a key for distinguishing between the type of plant
    
    plant_key = compiled_preds_layer1[compiled_preds_layer1.columns.drop(list(compiled_preds_layer1.filter(regex='not')))].idxmax(axis="columns")
    preds_layer2["plant_key"] = plant_key
    total_results_with_key = preds_layer2
    
    #Returns predicted classes across all plants with diseases ordered in absolute fasion (depending on what models ran first)
    if eval_mode == False:
    
        predictions_absolute_labels = preds_layer2
        i = 1
        
        while i < len(eval_class_nums):

            predictions_absolute_labels.iloc[:,i] += sum(eval_class_nums[:i])

            i += 1

        
        #When in doubt, loop
        #There's probably somthing out there that does this more cleanly
        row = 0
        while row < len(predictions_absolute_labels):

            col = 0
            while col < len(plants):

                if predictions_absolute_labels.columns[col] == predictions_absolute_labels.loc[row,"plant_key"]:
                    predictions_absolute_labels.loc[row,"absolute"] = int(predictions_absolute_labels.iloc[row,col])

                col += 1

            row += 1
            
            
        return predictions_absolute_labels
    
    else:
        
        
        return compiled_preds_layer1
    
    
        



## Model Evaluation

Here we'll compile and evaluate the four models.  

### Model Type-1, Exclusive

In [None]:
#Keep the variables here ordered in accordance to the way data dirctories are arranged

#Paths can either be .h5 or pb
mod_paths = [r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOtherNoex1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOtherNoex1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOtherNoex1"]

test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

plants = ["Apple", "Cherry", "Peach"]

class_nums = [4,2,2]

predictions = model_type_1(mod_paths, test_path, plants, class_nums, eval_mode = False) 

########################
#Performance analytics

#There's probably an easier way to return true classe

test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

testdata = ImageDataGenerator(rescale=1./255)
testgen = testdata.flow_from_directory(
    directory = test_path,
    class_mode = "categorical",
    shuffle = False)

y_pred = predictions["absolute_label"]


print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

#Gets a confidence interval
z = 1.96 # 95%
accuracy = accuracy_score(testgen.classes, y_pred)
conf = z * ((accuracy * (1 - accuracy)) / len(y_pred))**0.5
print("Accuracy: " + str(round(accuracy,3)))
print("95% CI: " + str(round(accuracy  - conf, 3)) + " - " + str(round(accuracy  + conf, 3)))

#some of the below plotting code taken from:
#https://stackoverflow.com/questions/35572000/how-can-i-plot-a-confusion-matrix
target_names = testgen.class_indices.keys()
type1_noex_plotdf = pd.DataFrame(confusion_matrix(testgen.classes, y_pred),
                                 index = list(target_names),
                                 columns = list(target_names))

type1_noex_plotdf = type1_noex_plotdf.div(list(type1_noex_plotdf.sum(axis=1)), axis = 0)

plt.figure(figsize = (15,15))
ax = sns.heatmap(type1_noex_plotdf.round(3), annot=True, annot_kws={"size": 30}, fmt='g', cbar=False)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel("Predicted Class", fontsize = 50)
ax.set_ylabel("True Class", fontsize = 50)
ax.set_title('Model Type 1 Exclusive-Other Predictions', fontsize = 65, loc = 'right')
ax.tick_params(labelsize=30)

plt.savefig(r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Figures\type1_noex_cm.jpeg",
            bbox_inches = "tight") 

### Model Type-1, Inclusive

In [None]:
mod_paths = [r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleDiseaseOther1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryDiseaseOther1",
             r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachDiseaseOther1"]

test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

plants = ["Apple", "Cherry", "Peach"]

class_nums = [4,2,2]

predictions2 = model_type_1(mod_paths, test_path, plants, class_nums, eval_mode = False) 

########################
#Performance analytics

#There's probably an easier way to return true classe

test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

testdata = ImageDataGenerator(rescale=1./255)
testgen = testdata.flow_from_directory(
    directory = test_path,
    class_mode = "categorical",
    shuffle = False)

y_pred = predictions2["absolute_label"]


print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

#Gets a confidence interval
z = 1.96 # 95%
accuracy = accuracy_score(testgen.classes, y_pred)
conf = z * ((accuracy * (1 - accuracy)) / 777)**0.5
print("Accuracy: " + str(round(accuracy,3)))
print("95% CI: " + str(round(accuracy  - conf, 3)) + " - " + str(round(accuracy  + conf, 3)))

#some of the below plotting code taken from:
#https://stackoverflow.com/questions/35572000/how-can-i-plot-a-confusion-matrix
target_names = testgen.class_indices.keys()
type1_plotdf = pd.DataFrame(confusion_matrix(testgen.classes, y_pred),
                                 index = list(target_names),
                                 columns = list(target_names))

type1_plotdf = type1_plotdf.div(list(type1_plotdf.sum(axis=1)), axis = 0)

plt.figure(figsize = (15,15))
ax = sns.heatmap(type1_plotdf.round(3), annot=True, annot_kws={"size": 30}, fmt='g', cbar=False)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel("Predicted Class", fontsize = 50)
ax.set_ylabel("True Class", fontsize = 50)
ax.set_title('Model Type 1 Inclusive-Other Predictions', fontsize = 65, loc = 'right')
ax.tick_params(labelsize=30)

plt.savefig(r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Figures\type1_cm.jpeg",
            bbox_inches = "tight")

### Model Type-2, Exclusive

In [None]:
#Evaluate


#####
#Paths, plants, and class nums need to be ordered the same as the test set classes are arranged in their directory (probbaly alphabetically)
#Otherwise we gotta change how the confusion matrix works
#####

#Specify layer 1 paths (need to keep same ordering throughout)

mod_paths_1 = [r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOtherNoex1',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryOtherNoex1',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachOtherNoex1'
               ]

#Specify layer 2 paths
mod_paths_2 = [r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\Apple1',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Cherry_1_Model.h5',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Peach_1_Model.h5'
               ]

#Need to provide plant type in the same order as the models
#Could probbaly make somthing to infer this automatically
plants = ["Apple", "Cherry", "Peach"]

#The dataset
test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

#The number of classes each layer 2 model can predict (dunno if there's any automatic way to get this info)
eval_class_nums = [4,2,2]

#Runs the models
prediction_aggregate = model_type_2(mod_paths_1, mod_paths_2, test_path, plants, eval_mode = False, eval_class_nums = eval_class_nums)

########################
#Performance analytics

#There's probably an easier way to return true classes
testdata = ImageDataGenerator(rescale=1./255)
testgen = testdata.flow_from_directory(
    directory = test_path,
    class_mode = "categorical",
    shuffle = False)

y_pred = prediction_aggregate["absolute"]


print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

#Gets a confidence interval
z = 1.96 # 95%
accuracy = accuracy_score(testgen.classes, y_pred)
conf = z * ((accuracy * (1 - accuracy)) / len(y_pred))**0.5
print("Accuracy: " + str(round(accuracy,3)))
print("95% CI: " + str(round(accuracy  - conf, 3)) + " - " + str(round(accuracy  + conf, 3)))

#some of the below plotting code taken from:
#https://stackoverflow.com/questions/35572000/how-can-i-plot-a-confusion-matrix
target_names = testgen.class_indices.keys()
type2_plotnoexdf = pd.DataFrame(confusion_matrix(testgen.classes, y_pred),
                                 index = list(target_names),
                                 columns = list(target_names))

type2_plotnoexdf = type2_plotnoexdf.div(list(type2_plotnoexdf.sum(axis=1)), axis = 0)

plt.figure(figsize = (15,15))
ax = sns.heatmap(type2_plotnoexdf.round(3), annot=True, annot_kws={"size": 30}, fmt='g', cbar=False)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel("Predicted Class", fontsize = 50)
ax.set_ylabel("True Class", fontsize = 50)
ax.set_title('Model Type 2 Exclusive-Other Predictions', fontsize = 65, loc = 'right')
ax.tick_params(labelsize=30)

plt.savefig(r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Figures\type2_noex_cm.jpeg",
            bbox_inches = "tight")

### Model Type-2, Inclusive

In [None]:
#Evaluate


#####
#Paths, plants, and class nums need to be ordered the same as the test set classes are arranged in their directory (probbaly alphabetically)
#Otherwise we gotta change how the confusion matrix works
#####

#Specify layer 1 paths (need to keep same ordering throughout)

mod_paths_1 = [r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\AppleOther2',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\CherryOther1',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\PeachesOther1'
               ]

#Specify layer 2 paths
mod_paths_2 = [r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Model_Checkpoints\Apple1',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Cherry_1_Model.h5',
               r'C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Python\Peach_1_Model.h5'
               ]

#Need to provide plant type in the same order as the models
#Could probbaly make somthing to infer this automatically
plants = ["Apple", "Cherry", "Peach"]

#The dataset
test_path = r"C:\Datasets\Plant Village\ApplePeachCherryTest"

#The number of classes each layer 2 model can predict (dunno if there's any automatic way to get this info)
eval_class_nums = [4,2,2]

#Runs the models
prediction_aggregate = model_type_2(mod_paths_1, mod_paths_2, test_path, plants, eval_mode = False, eval_class_nums = eval_class_nums)

########################
#Performance analytics

#There's probably an easier way to return true classes
testdata = ImageDataGenerator(rescale=1./255)
testgen = testdata.flow_from_directory(
    directory = test_path,
    class_mode = "categorical",
    shuffle = False)

y_pred = prediction_aggregate["absolute"]


print('Confusion Matrix')
print(confusion_matrix(testgen.classes, y_pred))
print('Classification Report')
target_names = testgen.class_indices.keys()
print(classification_report(testgen.classes, y_pred, target_names=target_names))

#Gets a confidence interval
z = 1.96 # 95%
accuracy = accuracy_score(testgen.classes, y_pred)
conf = z * ((accuracy * (1 - accuracy)) / len(y_pred))**0.5
print("Accuracy: " + str(round(accuracy,3)))
print("95% CI: " + str(round(accuracy  - conf, 3)) + " - " + str(round(accuracy  + conf, 3)))

#some of the below plotting code taken from:
#https://stackoverflow.com/questions/35572000/how-can-i-plot-a-confusion-matrix
target_names = testgen.class_indices.keys()
type2_plotdf = pd.DataFrame(confusion_matrix(testgen.classes, y_pred),
                                 index = list(target_names),
                                 columns = list(target_names))

type2_plotdf = type2_plotdf.div(list(type2_plotdf.sum(axis=1)), axis = 0)

plt.figure(figsize = (15,15))
ax = sns.heatmap(type2_plotdf.round(3), annot=True, annot_kws={"size": 30}, fmt='g', cbar=False)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel("Predicted Class", fontsize = 50)
ax.set_ylabel("True Class", fontsize = 50)
ax.set_title('Model Type 2 Inclusive-Other Predictions', fontsize = 65, loc = 'right')
ax.tick_params(labelsize=30)

plt.savefig(r"C:\Users\blume\OneDrive\Desktop\CUNY MSDS\Data 698 Masters Thesis\Figures\type2_cm.jpeg",
            bbox_inches = "tight")