In [41]:
from sklearn.svm import SVC
from sklearn.datasets import make_classification
import numpy as np
import os
import glob
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold

In [6]:
print(os.getcwd())
current = os.getcwd()
data_folder = current + "\\normalized_images"
chess_types_folders = glob.glob(data_folder + "\\*")
# print(chess_types_folders)
pieces_info = []
labels = {"King": 1, "Knight":2, "Bishop":3, "Rook":4, "Pawn":5, "Queen":6}
for chess_types in chess_types_folders:
    pieces = glob.glob(f'{chess_types}/*')
    # print(pieces)
    type = chess_types.split("\\")[-1]
    for piece in pieces:
        p = {"normalized_img": np.load(piece).reshape(-1), "label": labels[type]}
        pieces_info.append(p)
chess_df = pd.DataFrame(pieces_info)

C:\Users\giang\Downloads\chess\5525Chess


In [7]:
chess_df.head()

Unnamed: 0,normalized_img,label
0,"[0.3803921568627451, 0.3803921568627451, 0.376...",3
1,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",3
2,"[0.1568627450980392, 0.1568627450980392, 0.156...",3
3,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",3
4,"[0.27058823529411763, 0.27450980392156865, 0.2...",3


In [8]:
chess_df.iloc[0:5]['normalized_img'].shape #dim of 1 single image

(5,)

In [9]:
chess_df.iloc[0]['normalized_img']

array([0.38039216, 0.38039216, 0.37647059, ..., 0.7372549 , 0.71764706,
       0.70588235])

In [27]:
#Load dataset
X, y = np.array(chess_df['normalized_img']), np.array(chess_df['label'])
X = np.array([x for x in X])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [43]:
#Selecting best C for Linear SVM Model
C_values = [0.01, 0.1, 1.0, 10, 100]
cv_val_avg_score = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

for C in C_values:
	train_errors = []
	val_errors = []
	for train_idx, val_idx in skf.split(X_train, y_train):
		X_train_cv, X_val = X_train[train_idx], X_train[val_idx]
		y_train_cv, y_val = y_train[train_idx], y_train[val_idx]
		lin_svm = SVC(kernel='linear',C=C)
		lin_svm.fit(X_train, y_train)
		# validation error
		y_val_pred = lin_svm.predict(X_val)
		val_error = 1 - accuracy_score(y_val, y_val_pred)
		val_errors.append(val_error)

		# train error
		y_train_pred = lin_svm.predict(X_train_cv)
		train_error = 1 - accuracy_score(y_train_cv, y_train_pred)
		train_errors.append(train_error)

	avg_val_error = np.mean(val_errors)
	avg_train_error = np.mean(train_errors)

	# Report validation, test for each C
	print(f"C = {C}: (avg train error, avg validation error) = ({avg_train_error:.4f}, {avg_val_error:.4f})")

	cv_val_avg_score.append(avg_val_error)





C = 0.01: (avg train error, avg validation error) = (0.0000, 0.0000)
C = 0.1: (avg train error, avg validation error) = (0.0000, 0.0000)
C = 1.0: (avg train error, avg validation error) = (0.0000, 0.0000)
C = 10: (avg train error, avg validation error) = (0.0000, 0.0000)
C = 100: (avg train error, avg validation error) = (0.0000, 0.0000)
Best C = 0.01, with lowest avg test error: 0.0000
Final Test Error: 0.7821


In [46]:
#Fit linear model on best C
lowest_val_error_index = np.argmin(cv_val_avg_score)
C_chosen = C_values[lowest_val_error_index]

print(f"Best C = {C_chosen}, with lowest avg test error: {cv_val_avg_score[lowest_val_error_index]:.4f}")


final_model = SVC(kernel='linear',C=C_chosen)
final_model.fit(X_train, y_train)

y_test_pred = final_model.predict(X_test)
final_test_error = 1 - accuracy_score(y_test, y_test_pred)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')  # Use 'macro' or 'weighted' for multi-class
recall = recall_score(y_test, y_test_pred, average='weighted')
f1 = f1_score(y_test, y_test_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_test_pred)

print(f"Final Test Error: {final_test_error:.4f}")
print(f"Linear SVM Performance Metrics with C={C_chosen}:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Best C = 0.01, with lowest avg test error: 0.0000
Final Test Error: 0.7821
Linear SVM Performance Metrics with C=0.01:
Accuracy: 0.2179
Precision: 0.2174
Recall: 0.2179
F1 Score: 0.2161

Confusion Matrix:
[[ 9  7  3  3  6  4]
 [ 9 12  7  9  8  2]
 [ 5  5 10  8 10  4]
 [ 9  8  7 11 11  6]
 [ 5  7  7 10 11  9]
 [10  9  7  3  3  3]]

Classification Report:
              precision    recall  f1-score   support

           1       0.20      0.28      0.23        32
           2       0.24      0.26      0.25        47
           3       0.25      0.26      0.26        42
           4       0.24      0.19      0.21        52
           5       0.23      0.22      0.23        49
           6       0.11      0.09      0.10        35

    accuracy                           0.22       257
   macro avg       0.21      0.22      0.21       257
weighted avg       0.22      0.22      0.22       257



In [48]:
#Selecting best C for RBF SVM Model (gamma=auto)
C_values = [0.01, 0.1, 1.0, 10, 100]
cv_val_avg_score = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

for C in C_values:
	train_errors = []
	val_errors = []
	for train_idx, val_idx in skf.split(X_train, y_train):
		X_train_cv, X_val = X_train[train_idx], X_train[val_idx]
		y_train_cv, y_val = y_train[train_idx], y_train[val_idx]
		auto_svm = SVC(kernel='rbf',C=C, gamma='auto')
		auto_svm.fit(X_train, y_train)
		# validation error
		y_val_pred = auto_svm.predict(X_val)
		val_error = 1 - accuracy_score(y_val, y_val_pred)
		val_errors.append(val_error)

		# train error
		y_train_pred = auto_svm.predict(X_train_cv)
		train_error = 1 - accuracy_score(y_train_cv, y_train_pred)
		train_errors.append(train_error)

	avg_val_error = np.mean(val_errors)
	avg_train_error = np.mean(train_errors)

	# Report validation, test for each C
	print(f"C = {C}: (avg train error, avg validation error) = ({avg_train_error:.4f}, {avg_val_error:.4f})")

	cv_val_avg_score.append(avg_val_error)


C = 0.01: (avg train error, avg validation error) = (0.8180, 0.8180)
C = 0.1: (avg train error, avg validation error) = (0.8063, 0.8063)
C = 1.0: (avg train error, avg validation error) = (0.3356, 0.3355)
C = 10: (avg train error, avg validation error) = (0.0334, 0.0334)
C = 100: (avg train error, avg validation error) = (0.0000, 0.0000)


In [51]:
#Fit RBF model on best C (gamma='auto')
lowest_val_error_index = np.argmin(cv_val_avg_score)
C_chosen = C_values[lowest_val_error_index]

print(f"Best C = {C_chosen}, with lowest avg test error: {cv_val_avg_score[lowest_val_error_index]:.4f}")


final_model = SVC(kernel='rbf',C=C_chosen, gamma='auto')
final_model.fit(X_train, y_train)

y_test_pred = final_model.predict(X_test)
final_test_error = 1 - accuracy_score(y_test, y_test_pred)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')  # Use 'macro' or 'weighted' for multi-class
recall = recall_score(y_test, y_test_pred, average='weighted')
f1 = f1_score(y_test, y_test_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_test_pred)

print(f"Final Test Error: {final_test_error:.4f}")
print(f"RBF SVM Performance Metrics with C={C_chosen}, gamma='auto':")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Best C = 100, with lowest avg test error: 0.0000
Final Test Error: 0.7510
RBF SVM Performance Metrics with C=100, gamma='auto':
Accuracy: 0.2490
Precision: 0.2503
Recall: 0.2490
F1 Score: 0.2456

Confusion Matrix:
[[ 8  7  7  1  6  3]
 [ 4 19  9  6  8  1]
 [13  4 10  8  3  4]
 [ 6 13  5 14 11  3]
 [ 7  7 10  5 11  9]
 [10  7  7  4  5  2]]

Classification Report:
              precision    recall  f1-score   support

           1       0.20      0.28      0.23        32
           2       0.24      0.26      0.25        47
           3       0.25      0.26      0.26        42
           4       0.24      0.19      0.21        52
           5       0.23      0.22      0.23        49
           6       0.11      0.09      0.10        35

    accuracy                           0.22       257
   macro avg       0.21      0.22      0.21       257
weighted avg       0.22      0.22      0.22       257



In [52]:
#Selecting best C for RBF SVM Model (gamma=scale)
C_values = [0.01, 0.1, 1.0, 10, 100]
cv_val_avg_score = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

for C in C_values:
	train_errors = []
	val_errors = []
	for train_idx, val_idx in skf.split(X_train, y_train):
		X_train_cv, X_val = X_train[train_idx], X_train[val_idx]
		y_train_cv, y_val = y_train[train_idx], y_train[val_idx]
		scale_svm = SVC(kernel='rbf',C=C, gamma='scale')
		scale_svm.fit(X_train, y_train)
		# validation error
		y_val_pred = scale_svm.predict(X_val)
		val_error = 1 - accuracy_score(y_val, y_val_pred)
		val_errors.append(val_error)

		# train error
		y_train_pred = scale_svm.predict(X_train_cv)
		train_error = 1 - accuracy_score(y_train_cv, y_train_pred)
		train_errors.append(train_error)

	avg_val_error = np.mean(val_errors)
	avg_train_error = np.mean(train_errors)

	# Report validation, test for each C
	print(f"C = {C}: (avg train error, avg validation error) = ({avg_train_error:.4f}, {avg_val_error:.4f})")

	cv_val_avg_score.append(avg_val_error)

C = 0.01: (avg train error, avg validation error) = (0.8180, 0.8180)
C = 0.1: (avg train error, avg validation error) = (0.8063, 0.8063)
C = 1.0: (avg train error, avg validation error) = (0.3356, 0.3355)
C = 10: (avg train error, avg validation error) = (0.0334, 0.0334)
C = 100: (avg train error, avg validation error) = (0.0000, 0.0000)


In [53]:
#Fit RBF model on best C (gamma='scale')
lowest_val_error_index = np.argmin(cv_val_avg_score)
C_chosen = C_values[lowest_val_error_index]

print(f"Best C = {C_chosen}, with lowest avg test error: {cv_val_avg_score[lowest_val_error_index]:.4f}")


final_model = SVC(kernel='rbf',C=C_chosen, gamma='scale')
final_model.fit(X_train, y_train)

y_test_pred = final_model.predict(X_test)
final_test_error = 1 - accuracy_score(y_test, y_test_pred)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted')  # Use 'macro' or 'weighted' for multi-class
recall = recall_score(y_test, y_test_pred, average='weighted')
f1 = f1_score(y_test, y_test_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_test_pred)

print(f"Final Test Error: {final_test_error:.4f}")
print(f"RBF SVM Performance Metrics with C={C_chosen}, gamma='scale':")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Best C = 100, with lowest avg test error: 0.0000
Final Test Error: 0.7510
RBF SVM Performance Metrics with C=100, gamma='scale':
Accuracy: 0.2490
Precision: 0.2503
Recall: 0.2490
F1 Score: 0.2456

Confusion Matrix:
[[ 8  7  7  1  6  3]
 [ 4 19  9  6  8  1]
 [13  4 10  8  3  4]
 [ 6 13  5 14 11  3]
 [ 7  7 10  5 11  9]
 [10  7  7  4  5  2]]

Classification Report:
              precision    recall  f1-score   support

           1       0.20      0.28      0.23        32
           2       0.24      0.26      0.25        47
           3       0.25      0.26      0.26        42
           4       0.24      0.19      0.21        52
           5       0.23      0.22      0.23        49
           6       0.11      0.09      0.10        35

    accuracy                           0.22       257
   macro avg       0.21      0.22      0.21       257
weighted avg       0.22      0.22      0.22       257

