In [1]:
import cv2
import numpy as np
from skimage.feature import hog
import os
import csv
import re

def extract_hog_features(image_path):
    """Extracts HOG features from an image."""
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (60, 60))
    fd, _ = hog(image, orientations=9, pixels_per_cell=(8, 8),
                cells_per_block=(2, 2), visualize=True)
    return fd

def extract_color_features(image_path, bins=16):
    """Extracts HSV color histograms from an image."""
    image = cv2.imread(image_path)
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h_hist = cv2.calcHist([hsv_image], [0], None, [bins], [0, 180])
    s_hist = cv2.calcHist([hsv_image], [1], None, [bins], [0, 256])
    v_hist = cv2.calcHist([hsv_image], [2], None, [bins], [0, 256])
    h_hist = cv2.normalize(h_hist, h_hist).flatten()
    s_hist = cv2.normalize(s_hist, s_hist).flatten()
    v_hist = cv2.normalize(v_hist, v_hist).flatten()
    return np.concatenate((h_hist, s_hist, v_hist))

def load_label(labels_dir, subfolder, row, col):
    """Loads label from a CSV file."""
    label_path = os.path.join(labels_dir, subfolder, "bw_board", f"r{row}_c{col}.csv")
    try:
        with open(label_path, "r") as file:
            reader = csv.reader(file)
            next(reader)
            return next(reader)[0]  # "white", "black", or "none"
    except FileNotFoundError:
        print(f"Label not found: {label_path}")
        return "none"

def parse_row_col(filename):
    """Parses row and column from filename."""
    match = re.match(r"r(\d+)_c(\d+)", filename)
    if match:
        return int(match.group(1)), int(match.group(2))
    else:
        raise ValueError(f"Filename format error: {filename}")

# Directories
squares_dir = "chess_images/squares/"
labels_dir = "chess_images/labels/"
output_file = "chess_images/prepared_data/hog_hsv_svm_data.npy"

# Prepare dataset
features = []
labels = []

for subfolder in os.listdir(squares_dir):
    subfolder_path = os.path.join(squares_dir, subfolder)
    if os.path.isdir(subfolder_path):
        for board_folder in os.listdir(subfolder_path):
            board_path = os.path.join(subfolder_path, board_folder)
            if os.path.isdir(board_path):
                for square_image in os.listdir(board_path):
                    if square_image.endswith(".jpg"):
                        image_path = os.path.join(board_path, square_image)
                        
                        try:
                            row, col = parse_row_col(square_image)
                        except ValueError as e:
                            print(e)
                            continue
                        
                        # Extract features
                        hog_features = extract_hog_features(image_path)
                        hsv_features = extract_color_features(image_path)
                        combined_features = np.concatenate((hog_features, hsv_features))
                        
                        features.append(combined_features)
                        labels.append(load_label(labels_dir, subfolder, row, col))

# Convert features and labels into numpy arrays
X = np.array(features)
label_mapping = {"white": 1, "black": -1, "none": 0}
y = np.array([label_mapping[label] for label in labels])

# Save dataset
np.save(output_file, {"features": X, "labels": y})
print(f"Data preparation complete. Saved to {output_file}.")


Data preparation complete. Saved to chess_images/prepared_data/hog_hsv_svm_data.npy.


In [2]:
import numpy as np
import joblib
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Load dataset
data = np.load("chess_images/prepared_data/hog_hsv_svm_data.npy", allow_pickle=True).item()
X, y = data["features"], data["labels"]

# Define models
models = {
    "HOG-HSV SVM": SVC(kernel="linear"),
    "HOG-HSV Random Forest": RandomForestClassifier(n_estimators=100),
}

# Use Stratified K-Fold for balanced validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and save models
best_model = None
best_score = 0

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=kfold, scoring="accuracy")
    mean_score = scores.mean()
    print(f"{name}: Mean Accuracy = {mean_score:.4f}, Std Dev = {scores.std():.4f}")

    # Train on full dataset and save model
    model.fit(X, y)
    model_path = f"chess_images/prepared_data/{name.lower().replace(' ', '_')}_model.pkl"
    joblib.dump(model, model_path)
    print(f"Saved {name} model to {model_path}")

    # Track the best model
    if mean_score > best_score:
        best_score = mean_score
        best_model = model

# Save the best model separately
best_model_path = "chess_images/prepared_data/hog_hsv_best_model.pkl"
joblib.dump(best_model, best_model_path)
print(f"Best model saved to {best_model_path}")


HOG-HSV SVM: Mean Accuracy = 0.9999, Std Dev = 0.0002
Saved HOG-HSV SVM model to chess_images/prepared_data/hog-hsv_svm_model.pkl
HOG-HSV Random Forest: Mean Accuracy = 0.9995, Std Dev = 0.0004
Saved HOG-HSV Random Forest model to chess_images/prepared_data/hog-hsv_random_forest_model.pkl
Best model saved to chess_images/prepared_data/hog_hsv_best_model.pkl


In [3]:
import numpy as np
import joblib
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load dataset
data = np.load("chess_images/prepared_data/hog_hsv_svm_data.npy", allow_pickle=True).item()
X, y = data["features"], data["labels"]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA to retain 95% of variance
pca = PCA(n_components=0.95)
X_pca = pca.fit_transform(X_scaled)

# Define models
models = {
    "HOG-HSV PCA SVM": SVC(kernel="linear"),
    "HOG-HSV PCA Random Forest": RandomForestClassifier(n_estimators=100),
}

# Use Stratified K-Fold for balanced validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation and save models
best_model = None
best_score = 0

for name, model in models.items():
    scores = cross_val_score(model, X_pca, y, cv=kfold, scoring="accuracy")
    mean_score = scores.mean()
    print(f"{name}: Mean Accuracy = {mean_score:.4f}, Std Dev = {scores.std():.4f}")

    # Train on full dataset and save model
    model.fit(X_pca, y)
    model_path = f"chess_images/prepared_data/{name.lower().replace(' ', '_')}_model.pkl"
    joblib.dump(model, model_path)
    print(f"Saved {name} model to {model_path}")

    # Track the best model
    if mean_score > best_score:
        best_score = mean_score
        best_model = model

# Save the best model separately
best_model_path = "chess_images/prepared_data/hog_hsv_pca_best_model.pkl"
joblib.dump(best_model, best_model_path)
print(f"Best model saved to {best_model_path}")

HOG-HSV PCA SVM: Mean Accuracy = 0.9974, Std Dev = 0.0005
Saved HOG-HSV PCA SVM model to chess_images/prepared_data/hog-hsv_pca_svm_model.pkl
HOG-HSV PCA Random Forest: Mean Accuracy = 0.9799, Std Dev = 0.0019
Saved HOG-HSV PCA Random Forest model to chess_images/prepared_data/hog-hsv_pca_random_forest_model.pkl
Best model saved to chess_images/prepared_data/hog_hsv_pca_best_model.pkl
