In [None]:
import cv2
import numpy as np
from skimage.feature import hog
import os
import csv
import re

# Paths
squares_dir = "chess_images/squares/"
labels_dir = "chess_images/labels/"
features_file = "chess_images/prepared_data/lightgbm/features.npy"
labels_file = "chess_images/prepared_data/lightgbm/labels.npy"

def extract_hog_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (60, 60))

    fd, _ = hog(image, orientations=9, pixels_per_cell=(8, 8),
                cells_per_block=(2, 2), visualize=True)
    return fd

def load_label(labels_dir, subfolder, row, col):
    label_path = os.path.join(labels_dir, subfolder, "bw_board", f"r{row}_c{col}.csv")
    try:
        with open(label_path, "r") as file:
            reader = csv.reader(file)
            next(reader)  # Skip header
            return next(reader)[0]  # "white", "black", or "none"
    except FileNotFoundError:
        print(f"Label not found: {label_path}")
        return "none"

def parse_row_col(filename):
    match = re.match(r"r(\d+)_c(\d+)", filename)
    if match:
        return int(match.group(1)), int(match.group(2))
    else:
        raise ValueError(f"Filename format error: {filename}")

features = []
labels = []

for subfolder in os.listdir(squares_dir):
    subfolder_path = os.path.join(squares_dir, subfolder)
    if os.path.isdir(subfolder_path):
        for board_folder in os.listdir(subfolder_path):
            board_path = os.path.join(subfolder_path, board_folder)
            if os.path.isdir(board_path):
                for square_image in os.listdir(board_path):
                    if square_image.endswith(".jpg"):
                        image_path = os.path.join(board_path, square_image)
                        try:
                            row, col = parse_row_col(square_image)
                        except ValueError as e:
                            print(e)
                            continue

                        features.append(extract_hog_features(image_path))
                        labels.append(load_label(labels_dir, subfolder, row, col))

# Save features and labels
np.save(features_file, np.array(features))

# Directly save labels as text to avoid numeric confusion (we'll encode later)
np.save(labels_file, np.array(labels))

print(f"Saved features to {features_file}")
print(f"Saved labels to {labels_file}")


In [13]:
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load data
X = np.load("chess_images/prepared_data/lightgbm/features.npy")
y = np.load("chess_images/prepared_data/lightgbm/labels.npy")

# Encode labels ("white", "black", "none" → 0, 1, 2)
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# Split into train and validation sets (optional but recommended)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Prepare datasets for LightGBM
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val)

# Train LightGBM
params = {
    "objective": "multiclass",
    "num_class": 3,
    "metric": "multi_logloss",
    "verbosity": -1,
    # "learning_rate": 0.001,
    # "num_boost_round": 500,
    # "num_leaves": 50,
    # "max_depth": 7,
    # "lambda_l1": 0.1,
    # "lambda_l2": 0.1,
    # "early_stopping_rounds": 10
}

model = lgb.train(params, train_data, valid_sets=[train_data, val_data], num_boost_round=100)

# Save model and label encoder
model.save_model("chess_images/prepared_data/lightgbm_chess_model.txt")

import joblib
joblib.dump(encoder, "chess_images/prepared_data/label_encoder.pkl")

print("Training complete. Model and encoder saved.")


Training complete. Model and encoder saved.


In [26]:
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import joblib
import os

# Load data
X = np.load("chess_images/prepared_data/lightgbm/features.npy")
y = np.load("chess_images/prepared_data/lightgbm/labels.npy")

# Encode labels ("white", "black", "none" → 0, 1, 2)
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# Save label encoder (you only need one, it’s the same for all folds)
joblib.dump(encoder, "chess_images/prepared_data/label_encoder.pkl")

# LightGBM parameters
params = {
    "objective": "multiclass",
    "num_class": 3,
    "metric": "multi_logloss",
    "verbosity": -1,
    # Add or adjust parameters if needed
}

# Set up 5-fold stratified cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

best_model = None
best_loss = float("inf")
best_fold = None

# Training and tracking across folds
for fold, (train_idx, val_idx) in enumerate(kfold.split(X, y), 1):
    print(f"Training on fold {fold}...")

    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    train_data = lgb.Dataset(X_train, label=y_train)
    val_data = lgb.Dataset(X_val, label=y_val)

    model = lgb.train(params, train_data, valid_sets=[train_data, val_data], num_boost_round=100)

    # Get validation loss
    loss = model.best_score["valid_1"]["multi_logloss"]
    print(f"Fold {fold} multi_logloss: {loss:.4f}")

    # Check if this is the best fold so far
    if loss < best_loss:
        best_loss = loss
        best_model = model
        best_fold = fold

# Save only the best fold's model
if best_model is not None:
    best_model.save_model("chess_images/prepared_data/lightgbm_chess_model.txt")
    print(f"Best model was from fold {best_fold} with multi_logloss = {best_loss:.4f}")
    print("Best model saved as 'lightgbm_chess_model.txt'")

print("Cross-validation complete.")


Training on fold 1...
Fold 1 multi_logloss: 0.0619
Training on fold 2...
Fold 2 multi_logloss: 0.0632
Training on fold 3...
Fold 3 multi_logloss: 0.0622
Training on fold 4...
Fold 4 multi_logloss: 0.0642
Training on fold 5...
Fold 5 multi_logloss: 0.0634
Best model was from fold 1 with multi_logloss = 0.0619
Best model saved as 'lightgbm_chess_model.txt'
Cross-validation complete.


In [2]:
import cv2
import numpy as np
import pandas as pd
import lightgbm as lgb
from skimage.feature import hog
import joblib

# Load the trained LightGBM model and label encoder
model = lgb.Booster(model_file='chess_images/prepared_data/lightgbm_chess_model.txt')
encoder = joblib.load('chess_images/prepared_data/label_encoder.pkl')  # Load the label encoder used in training

def extract_hog_features(image):
    image = cv2.resize(image, (60, 60))
    fd, _ = hog(image, orientations=9, pixels_per_cell=(8, 8),
                cells_per_block=(2, 2), visualize=True)
    feature_names = [f'feature_{i}' for i in range(len(fd))]
    return pd.DataFrame([fd], columns=feature_names)

def split_and_predict_fen_with_overlay(image_path, output_path='chess_images/overlay_result.jpg'):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    color_image = cv2.imread(image_path)  # For overlay
    image = cv2.resize(image, (480, 480))
    color_image = cv2.resize(color_image, (480, 480))

    step = 60

    # This mapping depends on how the labels were encoded (check your training script)
    label_mapping_inverse = {
        0: "b",   # none = empty square
        1: "1",   # empty piece
        2: "w"    # black piece
    }

    predictions = []

    for row in range(8):
        row_data = []
        for col in range(8):
            square = image[row * step:(row + 1) * step, col * step:(col + 1) * step]
            features_df = extract_hog_features(square)

            # Predict class index (0, 1, 2) using LightGBM
            predicted_class_idx = np.argmax(model.predict(features_df), axis=1)[0]

            # Convert class index back to label using the encoder
            label = label_mapping_inverse[predicted_class_idx]

            row_data.append(label)

            # Add overlay text
            if label in ['w', 'b']:
                text_color = (255, 255, 255) if label == 'w' else (0, 0, 0)
                cv2.putText(color_image, label, (col * step + 20, row * step + 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2, cv2.LINE_AA)

        # Convert row to compact FEN format (combine consecutive empty squares)
        fen_row = ''.join(row_data)
        compact_fen_row = ''
        count = 0
        for char in fen_row:
            if char == '1':
                count += 1
            else:
                if count > 0:
                    compact_fen_row += str(count)
                    count = 0
                compact_fen_row += char
        if count > 0:
            compact_fen_row += str(count)

        predictions.append(compact_fen_row)

    # Join all rows into final FEN string
    fen_result = '/'.join(predictions)
    print("Generated FEN:", fen_result)

    # Save FEN to file
    with open("generated_bw_fen.fen", "w") as file:
        file.write(fen_result)

    # Save overlay image
    cv2.imwrite(output_path, color_image)

# Example usage
image_path = 'chess_images/test2.jpg'
split_and_predict_fen_with_overlay(image_path)


Generated FEN: 2wwbwwb/wwb1bbw1/bwwbww1b/2w5/1b1b1b2/b2ww3/wb1bwb2/6b1
