In [1]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from skimage.feature import hog
import pandas as pd

# Directory containing the image data
data_dir = r'D:\Code\py_code\Pattern-Recognition\Better-Sirekap\data'
img_list = os.listdir(data_dir)
img_files = [os.path.join(data_dir, img) for img in img_list]

# Preprocess and crop the image to the target size
def preprocess_and_crop_image(image_path, target_size=(48, 48)):
    image = cv2.imread(image_path, cv2.COLOR_BGR2GRAY)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(contour)
        cropped = gray[y:y+h, x:x+w]
        resized = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA)
        return resized
    
    resized = cv2.resize(gray, target_size, interpolation=cv2.INTER_AREA)
    return resized

# Extract HOG features from an image
def extract_hog_features(image):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(image, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), visualize=True)
    return features

# Extract zoning features from an image
def extract_zoning_features(image, zones=(4, 4)):
    h, w = image.shape
    zh, zw = h // zones[0], w // zones[1]
    features = []
    for i in range(zones[0]):
        for j in range(zones[1]):
            zone = image[i * zh:(i + 1) * zh, j * zw:(j + 1) * zw]
            features.append(np.mean(zone))
    return np.array(features)

# Extract PCA features from an image
def extract_pca_features(image, pca):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    flattened = image.flatten().reshape(1, -1)
    pca_features = pca.transform(flattened)
    return pca_features.flatten()

# Extract combined features (HOG, Zoning, PCA) from an image
def extract_combined_features(image_path, pca):
    image = preprocess_and_crop_image(image_path)
    hog_features = extract_hog_features(image)
    zoning_features = extract_zoning_features(image)
    pca_features = extract_pca_features(image, pca)
    return np.concatenate([hog_features, zoning_features, pca_features])

# Label extraction from image filenames
data = img_files
labels = [int(img[0]) for img in img_list]

# Different test splits
test_splits = [0.2, 0.3, 0.4]

# Feature extraction combinations
feature_extraction_combinations = [
    ("HOG", lambda img: extract_hog_features(preprocess_and_crop_image(img))),
    ("HOG + Zoning + PCA", lambda img: extract_combined_features(img, pca)),
    ("Zoning", lambda img: extract_zoning_features(preprocess_and_crop_image(img))),
    ("PCA", lambda img: extract_pca_features(preprocess_and_crop_image(img), pca)),
    ("HOG + Zoning", lambda img: np.concatenate([extract_hog_features(preprocess_and_crop_image(img)), extract_zoning_features(preprocess_and_crop_image(img))])),
    ("HOG + PCA", lambda img: np.concatenate([extract_hog_features(preprocess_and_crop_image(img)), extract_pca_features(preprocess_and_crop_image(img), pca)])),
    ("Zoning + PCA", lambda img: np.concatenate([extract_zoning_features(preprocess_and_crop_image(img)), extract_pca_features(preprocess_and_crop_image(img), pca)])),
    ("Raw Pixels", lambda img: preprocess_and_crop_image(img).flatten()),
]

# Classifier configurations
classifiers = [
    ("SVM", SVC(kernel='linear', random_state=42, gamma=0.001, C=10, probability=True)),
    ("Random Forest", RandomForestClassifier(n_estimators=100, random_state=42))
]

# Experiment loop
results = []

for test_split in test_splits:
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=test_split, random_state=42, stratify=labels)
    
    # Prepare PCA on training data
    all_images = [preprocess_and_crop_image(img) for img in X_train]
    flattened_images = [img.flatten() for img in all_images]
    pca = PCA(n_components=min(10, len(flattened_images[0])), random_state=42)
    pca.fit(flattened_images)

    for feature_name, feature_func in feature_extraction_combinations:
        X_train_features = np.array([feature_func(img) for img in X_train])
        X_test_features = np.array([feature_func(img) for img in X_test])

        # Scale the features
        scaler = StandardScaler()
        X_train_features = scaler.fit_transform(X_train_features)
        X_test_features = scaler.transform(X_test_features)

        for clf_name, clf in classifiers:
            clf.fit(X_train_features, y_train)
            y_pred = clf.predict(X_test_features)
            
            accuracy = accuracy_score(y_test, y_pred)
            results.append((feature_name, clf_name, test_split, round(accuracy*100, 2)))
            print(f"Test Split: {test_split}, Feature: {feature_name}, Classifier: {clf_name}, Accuracy: {accuracy * 100:.2f}%")

Test Split: 0.2, Feature: HOG, Classifier: SVM, Accuracy: 98.34%
Test Split: 0.2, Feature: HOG, Classifier: Random Forest, Accuracy: 97.02%
Test Split: 0.2, Feature: HOG + Zoning + PCA, Classifier: SVM, Accuracy: 98.28%
Test Split: 0.2, Feature: HOG + Zoning + PCA, Classifier: Random Forest, Accuracy: 97.08%
Test Split: 0.2, Feature: Zoning, Classifier: SVM, Accuracy: 77.14%
Test Split: 0.2, Feature: Zoning, Classifier: Random Forest, Accuracy: 71.31%
Test Split: 0.2, Feature: PCA, Classifier: SVM, Accuracy: 60.37%
Test Split: 0.2, Feature: PCA, Classifier: Random Forest, Accuracy: 63.02%
Test Split: 0.2, Feature: HOG + Zoning, Classifier: SVM, Accuracy: 98.21%
Test Split: 0.2, Feature: HOG + Zoning, Classifier: Random Forest, Accuracy: 96.69%
Test Split: 0.2, Feature: HOG + PCA, Classifier: SVM, Accuracy: 98.28%
Test Split: 0.2, Feature: HOG + PCA, Classifier: Random Forest, Accuracy: 96.95%
Test Split: 0.2, Feature: Zoning + PCA, Classifier: SVM, Accuracy: 83.90%
Test Split: 0.2, Fea

In [2]:
# Display the results in a DataFrame
df_results = pd.DataFrame(results, columns=['Feature Extraction', 'Classifier', 'Test Split', 'Accuracy'])
df_pivot = df_results.pivot_table(index=['Feature Extraction', 'Classifier'], columns='Test Split', values='Accuracy').reset_index()
df_pivot.columns.name = None
df_pivot

Unnamed: 0,Feature Extraction,Classifier,0.2,0.3,0.4
0,HOG,Random Forest,97.02,96.73,96.49
1,HOG,SVM,98.34,98.28,97.91
2,HOG + PCA,Random Forest,96.95,96.82,96.42
3,HOG + PCA,SVM,98.28,98.23,97.98
4,HOG + Zoning,Random Forest,96.69,96.6,96.49
5,HOG + Zoning,SVM,98.21,98.23,97.95
6,HOG + Zoning + PCA,Random Forest,97.08,96.95,96.49
7,HOG + Zoning + PCA,SVM,98.28,98.19,97.95
8,PCA,Random Forest,63.02,56.39,53.78
9,PCA,SVM,60.37,54.71,52.78
