In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from torchvision import transforms

# Define paths
DATASET_PATH = './data/original/'
TRAIN_PATH = os.path.join(DATASET_PATH, 'Training')
TEST_PATH = os.path.join(DATASET_PATH, 'Testing')

# Define image transformations
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Function to load images and convert to tensors
def load_images(img_dir):
    img_data = []
    labels = []
    classes = os.listdir(img_dir)
    for cls in classes:
        cls_path = os.path.join(img_dir, cls)
        if os.path.isdir(cls_path):
            for img_name in os.listdir(cls_path):
                img_path = os.path.join(cls_path, img_name)
                try:
                    img = Image.open(img_path)
                    img = img_transform(img)
                    img_data.append(np.array(img).flatten())
                    labels.append(cls)
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(img_data), np.array(labels)

# Load training and testing data
X_train, y_train = load_images(TRAIN_PATH)
X_test, y_test = load_images(TEST_PATH)

# Convert labels to numerical format
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

In [None]:
# Define parameter grid for Grid Search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize Random Forest Classifier
rf = RandomForestClassifier(random_state=42)

# Initialize Grid Search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best model
best_rf = grid_search.best_estimator_

print("Best parameters found: ", grid_search.best_params_)

# Evaluate the model on the test set
y_pred = best_rf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))