In [1]:
import os
import cv2
import yaml
import numpy as np
from skimage.feature import hog
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# HOG & Random Forest

# Load data
with open("archive/data.yaml") as f:
    data = yaml.safe_load(f)

class_names = data['names']
num_classes = data['nc']

train_img_path = os.path.join("archive", "train/images")
val_img_path = os.path.join("archive", "valid/images")

train_label_path = os.path.join("archive", "train/labels")
val_label_path = os.path.join("archive", "valid/labels")

# Read image and label
def load_images_and_labels(img_folder, label_folder):
    X = []
    y = []
    for label_file in os.listdir(label_folder):
        if not label_file.endswith(".txt"):
            continue
        img_file = label_file.replace(".txt", ".jpg")
        img_path = os.path.join(img_folder, img_file)
        if not os.path.exists(img_path):
            continue
        
        # Read image
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, (128, 128))
        
        # Read label file
        with open(os.path.join(label_folder, label_file)) as f:
            lines = f.readlines()
            if len(lines) == 0:
                continue  # skip empty files
            for line in lines:
                line = line.strip()
                if line == "":
                    continue
                class_id = int(line.split()[0])
                
                # Append image and class_id
                X.append(img)
                y.append(class_id)
    return X, y


# Split training and validation data
X_train_imgs, y_train = load_images_and_labels(train_img_path, train_label_path)
X_val_imgs, y_val = load_images_and_labels(val_img_path, val_label_path)

print(f"Training samples: {len(X_train_imgs)}, Validation samples: {len(X_val_imgs)}")

# Extract HOG Features 
def extract_hog_features(image_list):
    features = []
    for img in image_list:
        hog_feat = hog(img, pixels_per_cell=(16,16), cells_per_block=(2,2), 
                       orientations=9, visualize=False)
        features.append(hog_feat)
    return np.array(features)

X_train = extract_hog_features(X_train_imgs)
X_val = extract_hog_features(X_val_imgs)

print(f"HOG feature vector size: {X_train.shape[1]}")

# Train Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate 
y_pred = clf.predict(X_val)
print(classification_report(y_val, y_pred, target_names=class_names))
ConfusionMatrixDisplay.from_predictions(y_val, y_pred)


# -----------------------------
# Testing
# -----------------------------

# test_img_path = os.path.join("archive", "test/images")
# test_label_path = os.path.join("archive", "test/labels")

# X_test_imgs, y_test = load_images_and_labels(test_img_path, test_label_path)

# X_test = extract_hog_features(X_test_imgs)

# y_test_pred = clf.predict(X_test)

# print("----- Test Set Results -----")
# print(classification_report(y_test, y_test_pred, target_names=class_names))
# ConfusionMatrixDisplay.from_predictions(y_test, y_test_pred)

# single_img_path = os.path.join("archive", "test/images", "ants-17-_jpg.rf.366ce3d542821626b2926e3142d1bb64.jpg")
# img = cv2.imread(single_img_path)
# img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# img_resized = cv2.resize(img_gray, (128, 128))
# features = hog(img_resized, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9, visualize=False)
# features = features.reshape(1, -1)

# pred_class_id = clf.predict(features)[0]
# pred_class_name = class_names[pred_class_id]

# # Show image with predicted label
# plt.imshow(img_resized, cmap='gray')
# plt.title(f"Predicted: {pred_class_name}")
# plt.axis('off')
# plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'archive/data.yaml'