In [2]:
import os 
import numpy as np 
from PIL import Image

In [4]:
data_path = 'image data/'

In [21]:
train_data_path = os.path.join(data_path,"train")

In [7]:
def load_image_dataset(dataset_directory:str,img_height:int = 224,img_width:int = 224,flatten_image:bool = False):
    """ 
        Params:
        dataset_directory: path to your dataset directory,
        img_height = standard height of image
        img_width = standard width of image
        flatten_image = False: Image will be returned with actual size 
                        True : Image will be flattened 
         
    """   

    # Create empty lists to store the images and their corresponding labels
    images = []
    labels = []

    # Iterate through the subdirectories (each subdirectory represents a class)
    for class_name in os.listdir(dataset_directory):
        class_dir = os.path.join(dataset_directory, class_name)
        for subclass_name in os.listdir(class_dir):
            subclass_dir = os.path.join(class_dir,subclass_name)
            print(subclass_dir)
            for image_name in os.listdir(subclass_dir):
                image_path = os.path.join(subclass_dir, image_name)
                image = Image.open(image_path)  # Open the image using PIL
                image = image.resize((img_width, img_height))  # Resize the image to a desired size
                image = np.array(image)  # Convert the image to a numpy array
                images.append(image)
                labels.append(class_name)

    # Convert the lists to numpy arrays
    images = np.array(images)
    labels = np.array(labels)

    
    if flatten_image: # flattening image i.e converting image into vector
        images = images.reshape(images.shape[0],-1)

    return images,labels


In [23]:
train_images, train_labels = load_image_dataset(dataset_directory=train_data_path,flatten_image=True)

In [24]:
train_images.shape

(38104, 150528)

In [25]:
train_labels.shape

(38104,)

In [26]:
from sklearn.ensemble import RandomForestClassifier

In [27]:
RFC = RandomForestClassifier(verbose=True)

In [28]:
RFC.fit(X=train_images,y=train_labels)

MemoryError: Unable to allocate 21.4 GiB for an array with shape (38104, 150528) and data type float32

##### Saving trained model

In [None]:
import joblib

def save_model(model,fruit:str,filename:str,location:str):

    location = os.path.join(location,fruit)
    if not os.path.isdir(location):
        os.mkdir(location)
        
    filename = os.path.join(location,filename+".jblib")

    joblib.dump(model,filename=filename)

In [None]:
save_model(RFC,fruit="all",location="models/",filename="RFC")

## Testing Process

##### loading saved model

In [None]:
RFC = joblib.load('models/all/RFC.jblib') # Loaded the saved model

In [10]:
test_data_path = os.path.join(data_path,"validation")

In [11]:
test_images,test_labels =  load_image_dataset(dataset_directory=test_data_path,flatten_image=True)

image data/validation\apple\apple scab
image data/validation\apple\black rot
image data/validation\apple\cedar apple rust
image data/validation\apple\healthy
image data/validation\Cassava\Bacterial Blight (CBB)
image data/validation\Cassava\Brown Streak Disease (CBSD)
image data/validation\Cassava\Green Mottle (CGM)
image data/validation\Cassava\Healthy
image data/validation\Cassava\Mosaic Disease (CMD)
image data/validation\cherry (including sour)\healthy
image data/validation\cherry (including sour)\powdery mildew
image data/validation\corn (maize)\cercospora leaf spot gray leaf spot
image data/validation\corn (maize)\common rust
image data/validation\corn (maize)\healthy
image data/validation\corn (maize)\northern leaf blight
image data/validation\grape\black rot
image data/validation\grape\esca (black measles)
image data/validation\grape\healthy
image data/validation\grape\leaf blight (isariopsis leaf spot)
image data/validation\orange\haunglongbing (citrus greening)
image data/val

In [12]:
test_images.shape

(9458, 150528)

In [None]:
prediction = RFC.predict(test_images)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_true=test_labels,y_pred=prediction)