In [1]:
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz

import os
from os import listdir
import gdown
from zipfile import ZipFile
import pickle
import cv2
from os import listdir

In [2]:
from tensorflow.keras.utils import img_to_array
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            img_gan = cv2.resize(image, (32,32))
            return img_to_array(img_gan)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

In [3]:
def load_dataset(directory_root):
#     directory_root = 'DCGAN_plant'
    image_list_GAN, label_list = [], []
    try:
        print("[INFO] Loading images ...")
        root_dir = listdir(directory_root)
        #print(root_dir)
        for plant_folder in root_dir :
            # remove .DS_Store from list
            if plant_folder == ".DS_Store" :
                root_dir.remove(directory)


        for plant_folder in root_dir :
    #         print("on plant folder")
            plant_image_list = listdir(f"{directory_root}/{plant_folder}")



            for single_image in plant_image_list :

    #             print("in plant folder")
                if single_image == ".DS_Store" :
                    plant_image_list.remove(single_image)

            for image in plant_image_list[:1000]:
    #             print("getting img path")
                image_directory = f"{directory_root}/{plant_folder}/{image}"
                if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True or image_directory.endswith(".png") == True or image_directory.endswith(".PNG") == True:
    #                 print("checking if img correct format")
                    gan_img = convert_image_to_array(image_directory)
                    image_list_GAN.append(gan_img)
                    label_list.append(plant_folder)

        print("[INFO] Image loading completed")  
        return image_list_GAN, label_list
    except Exception as e:
        print(f"Error : {e}")

In [8]:
# enhanced_image_list , enhanced_label_list = extract_plant_imgs(enhanced_data)
orignal_img_list, orignal_label_list = load_dataset('Orignal_plant')

[INFO] Loading images ...
[INFO] Image loading completed


In [9]:
GAN_image_list, GAN_label_list = load_dataset('DCGAN_plant')

[INFO] Loading images ...
[INFO] Image loading completed


In [10]:
normalized_image_list_GAN = np.array(GAN_image_list, dtype = np.float16) / 255.0
normalized_image_list_orignal = np.array(orignal_img_list, dtype = np.float16) / 255.0

In [11]:
# from sklearn.preprocessing import LabelBinarizer
# # print(label_list)
# label_gen = LabelBinarizer()
# image_labels = label_gen.fit_transform(label_list)
# pickle.dump(label_gen,open('label_transform.pkl', 'wb'))
# n_classes = len(label_gen.classes_)
# print(n_classes)
# print(image_labels)

In [12]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

label_encoder = LabelEncoder()
image_labels_GAN = label_encoder.fit_transform(GAN_label_list)
n_classes = len(label_encoder.classes_)
print(n_classes)
print(image_labels_GAN)

# Convert the labels to one-hot encoded vectors
image_labels_GAN = to_categorical(image_labels_GAN, num_classes=n_classes)
print(image_labels_GAN[3000])

10
[0 0 0 ... 9 9 9]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [13]:
label_encoder = LabelEncoder()
image_labels_orignal = label_encoder.fit_transform(orignal_label_list)
n_classes = len(label_encoder.classes_)
print(n_classes)
print(image_labels_orignal)

# Convert the labels to one-hot encoded vectors
image_labels_orignal = to_categorical(image_labels_orignal, num_classes=n_classes)
print(image_labels_orignal[3000])

10
[0 0 0 ... 9 9 9]
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [14]:
from sklearn.model_selection import train_test_split
print("making the training and testing split with 30% of the dataset as testing set")
GAN_x_train, GAN_x_test, GAN_y_train, GAN_y_test = train_test_split(normalized_image_list_GAN, image_labels_GAN, test_size=0.3, shuffle=True,random_state = 42)

orignal_x_train, orignal_x_test, orignal_y_train, orignal_y_test = train_test_split(normalized_image_list_orignal, image_labels_orignal, test_size=0.3, shuffle=True,random_state = 42)


making the training and testing split with 30% of the dataset as testing set


In [21]:
# new_image_list = np.array(image_list, dtype = np.float16) / 255.0
# print(new_image_list.shape)
print(orignal_x_train.shape)
print(orignal_y_train.shape)

(7000, 32, 32, 3)
(7000, 10)


In [16]:
# from sklearn.model_selection import train_test_split
# print("making the training and testing split with 30% of the dataset as testing set")
# x_train, x_test, y_train, y_test = train_test_split(new_image_list, image_labels, test_size=0.1, random_state = 42) 
# print(x_train.shape)

In [17]:
def convert_to_2D(set_shape):
    num_images, height, width, channels = set_shape.shape
    converted_shape = np.reshape(set_shape, (num_images, height * width * channels))
    return converted_shape

def convert_to_1D(set_shape):
    converted_shape = set_shape.flatten()
    return converted_shape

In [18]:
GAN_x_train_2D = convert_to_2D(GAN_x_train)
GAN_x_test_2D = convert_to_2D(GAN_x_test)
GAN_y_train_1D = convert_to_1D(GAN_y_train)
GAN_y_test_1D = convert_to_1D(GAN_y_test)

orignal_x_train_2D = convert_to_2D(orignal_x_train)
orignal_x_test_2D = convert_to_2D(orignal_x_test)
orignal_y_train_1D = convert_to_1D(orignal_y_train)
orignal_y_test_1D = convert_to_1D(orignal_y_test)


In [23]:
print(orignal_x_train_2D.shape)
print(orignal_y_train_1D.shape)

(7000, 3072)
(70000,)


In [19]:
# Create a Random Forest classifier with 100 trees
rfc = RandomForestClassifier(n_estimators=100)
# x_train = x_train.flatten()
# Fit the classifier to the training data
rfc.fit(orignal_x_train_2D, orignal_y_train_1D)


ValueError: Found input variables with inconsistent numbers of samples: [7000, 70000]

In [20]:
# pickle.dump(rfc,open('Enhanced_tomato_DCGAN_rf','wb'))

In [21]:
y_pred = rfc.predict(orignal_x_test_2D)

# Calculate the accuracy of the classifier

y_pred = np.where(y_pred[:len(orignal_x_test_2D)] >= 0.5, 1, 0)

In [22]:
from sklearn.metrics import classification_report, confusion_matrix
# print(confusion_matrix(y_test, y_pred))
# print(classification_report(y_test, y_pred))

# print("Accuracy:", accuracy * 100)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix

precision = precision_score(orignal_y_test_1D, y_pred, average='macro')
recall = recall_score(orignal_y_test_1D, y_pred, average='macro')
f1 = f1_score(orignal_y_test_1D, y_pred, average='macro')
accuracy = accuracy_score(orignal_y_test_1D, y_pred)
# Calculate the confusion matrix.
# cm = confusion_matrix(orignal_y_test_1D, y_pred)

# Print the precision, recall, f1-score, and confusion matrix.
print("Precision:", precision * 100)
print("Recall:", recall * 100)
print("F1-score:", f1 * 100)
print("Accuracy:", accuracy * 100)
# print("Confusion matrix:\n", cm)

Precision: 1.0
Recall: 1.0
F1-score: 1.0
Accuracy: 100.0
Confusion matrix:
 [[102   0]
 [  0  98]]
