<h1>CNN Approach

Imports and Training Data

In [1]:
import pandas as pd
df = pd.read_csv('/kaggle/input/the-streets-chess-showdown-white-vs-black/train.csv')
df.head()

Unnamed: 0,id,evaluation
0,23207098,white
1,33225864,black
2,24017235,equal
3,19716795,black
4,10621512,black


Converting Images to FEN Encodings

In [2]:
#imports
from skimage.metrics import structural_similarity as ssim
import cv2
import os

In [3]:
def compute_ssim(imageA, imageB):
    imageA = cv2.cvtColor(imageA, cv2.COLOR_BGR2GRAY)
    imageB = cv2.cvtColor(imageB, cv2.COLOR_BGR2GRAY)
    return ssim(imageA, imageB)

In [4]:
fen_encoding = []
labels = []

In [5]:
# for ind in df.index:
#     img1 = cv2.imread('/kaggle/input/the-streets-chess-showdown-white-vs-black/train/'+str(df.loc[ind,'id'])+'.png')
#     board = ""

#     ##ITERATING OVER THE IMAGE
#     for i in range(0,360,45):
#         fen = ""
#         count = 0
#         for j in range(0,360,45):

#             ##Cropping the image (45x45)
#             img = img1[i:i+44,j:j+44]
#             img = cv2.resize(img,(45,45))
#             clas = None
#             score = 0

#             #compares similarity score with all other pieces available
#             for k in os.listdir('/kaggle/input/pieces2/chess pieces/'):
#                 imgcheck = cv2.imread('/kaggle/input/pieces2/chess pieces/'+k)
#                 imgcheck = cv2.resize(imgcheck,(45,45))
#                 scoreNow = compute_ssim(img,imgcheck)
#                 if scoreNow>score:
#                     score = scoreNow
#                     clas = k[0]

#             ##ADDING THE DETECTED PIECE
#             if clas != '1' and count==0:
#                 fen+=clas

#             elif clas != '1' and count!=0:
#                 fen+=str(count)
#                 fen+=clas
#                 count = 0
#             else:
#                  count+=1
#         if count!=0:
#             fen+=str(count)
#         fen+='/'

#         board+=fen
#     fen_encoding.append(board)
#     labels.append(df.loc[ind,'evaluation'])

In [6]:
#import  pickle
# with open('/kaggle/working/fen_encodings_labels.pkl', 'wb') as f: 
#     pickle.dump((fen_encoding, labels), f



FEN to Grayscale Images Arrays

In [7]:
import cv2
import numpy as np

piece_to_gray = {'K': 1.0, 'Q': 0.8, 'R': 0.6, 'B': 0.5, 'N': 0.4, 'P': 0.2, 'k': -1.0, 'q': -0.8, 'r': -0.6, 'b': -0.5, 'n': -0.4, 'p': -0.2, '.': 0.0}

In [8]:
images = list()

In [9]:
import pickle
with open("/kaggle/input/encodi/train_set_encodings.pkl","rb") as f:
    fen_encoding, labels = pickle.load(f)


#Converting to numerical
for ind,label in enumerate(labels):
    if label=='white':
        labels[ind] = 2
    elif label=='equal':
        labels[ind] = 1
    else:
        labels[ind] = 0

In [10]:
def fen2image(fen):
    # A blank image with only black background
    board = np.zeros((8, 8), dtype=np.float32)

    # Spliting the FEN string into rows
    rows = fen.split('/')
    
    for i, row in enumerate(rows):
        col = 0
        for char in row:
            if char.isdigit():
                col += int(char) 
            else:
                gray_value = piece_to_gray.get(char, 0.0)
                #So I forgot to multiply by 255 by mistake and the thing is I observed not multiplying by 255 is giving much more better output
                #I tweaked the input weights and accuracy improves
                #As an image this thing makes 0 sense but somehow these features are getting well captured in the CNN 
                board[i, col] = gray_value  
                col += 1

    return board

In [11]:


#Converting FEN to Images
for fen in fen_encoding:
    images.append(fen2image(fen))

CNN 

In [12]:
#Here's a reallly complex CNN Architecture

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization

model = Sequential([
    Conv2D(32, (1, 1), activation='relu', input_shape=(8, 8, 1)),
    MaxPooling2D(pool_size=(1, 1)),

    Conv2D(64, (1, 1), activation='relu'),
    MaxPooling2D(pool_size=(1, 1)),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

X = np.array(images)
y = np.array(labels)
#prepares one hot encoding as in the (Size of list x 3) matrix with 1 wherever it is true
y = to_categorical(y, num_classes=3) 

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=92)

In [14]:
epochs = 20
batch_size = 32

history = model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

Epoch 1/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 0.4865 - loss: 1.0223 - val_accuracy: 0.5811 - val_loss: 0.9130
Epoch 2/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.5723 - loss: 0.9183 - val_accuracy: 0.5445 - val_loss: 0.9418
Epoch 3/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6006 - loss: 0.8708 - val_accuracy: 0.5827 - val_loss: 0.8912
Epoch 4/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6125 - loss: 0.8504 - val_accuracy: 0.5997 - val_loss: 0.8735
Epoch 5/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6213 - loss: 0.8316 - val_accuracy: 0.6200 - val_loss: 0.8390
Epoch 6/20
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.6278 - loss: 0.8119 - val_accuracy: 0.6188 - val_loss: 0.8379
Epoch 7/20
[1m537/537

Testing it on validation dataset

In [15]:
validation_loss, validation_accuracy = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {validation_accuracy * 100:.2f}%')

[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5696 - loss: 1.0715
Validation Accuracy: 57.39%


Processing The Testing Data

In [16]:
df_test = pd.read_csv('/kaggle/input/the-streets-chess-showdown-white-vs-black/sample_submission.csv')
df_test.head()

Unnamed: 0,image_id,evaluation
0,26065374,
1,32481096,
2,36997677,
3,34417938,
4,28905181,


In [17]:
test_encodings = []

In [18]:
# for ind in df_test.index:
        
#     img1 = cv2.imread('/kaggle/input/the-streets-chess-showdown-white-vs-black/test/'+str(df_test.loc[ind,'image_id'])+'.png')
#     board = ""

#     ##ITERATING OVER THE IMAGE
#     for i in range(0,360,45):
#         fen = ""
#         count = 0
#         for j in range(0,360,45):

#             ##Cropping the image (45x45)
#             img = img1[i:i+44,j:j+44]
#             img = cv2.resize(img,(45,45))
#             clas = None
#             score = 0

#             #compares similarity score with all other pieces available
#             for k in os.listdir('/kaggle/input/pieces'):
#                 imgcheck = cv2.imread('/kaggle/input/pieces/'+k)
#                 imgcheck = cv2.resize(imgcheck,(45,45))
#                 scoreNow = compute_ssim(img,imgcheck)
#                 if scoreNow>score:
#                     score = scoreNow
#                     clas = k[0]

#             ##ADDING THE DETECTED PIECE
#             if clas != '1' and count==0:
#                 fen+=clas

#             elif clas != '1' and count!=0:
#                 fen+=str(count)
#                 fen+=clas
#                 count = 0
#             else:
#                  count+=1
#         if count!=0:
#             fen+=str(count)
#         fen+='/'

#         board+=fen
#     test_encodings.append(board)


In [19]:
# import pickle
# with open('/kaggle/working/test_encodinga_6780_to_end.pkl', 'wb') as f: 
#     pickle.dump(test_encodings, f)

Converting FEN into Grayscale Image Map

In [20]:
image_id = df_test['image_id'].to_list()

with open("/kaggle/input/encodi/test_encodings.pkl","rb") as f:
    image_encoding = pickle.load(f)

In [21]:
image_array = [fen2image(image) for image in image_encoding]

Prediction Code

In [22]:
# image_array = np.array(image_array)
# if image_array.shape != (8,8,1):
#     image_array = np.expand_dims(image_array, axis=-1)
# for image in image_array:
#     prediction = model.predict(image)
image_array = np.array(image_array)
processed_images = []
for image in image_array:
    if image.shape != (8, 8, 1):
        image = np.expand_dims(image, axis=-1)  
    processed_images.append(image)
processed_images = np.stack(processed_images, axis=0)
prediction = model.predict(processed_images)

[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [23]:
prediction.shape

(8540, 3)

In [24]:
image_array.shape

(8540, 8, 8)

In [25]:
prediction = np.argmax(prediction, axis=1)
class_labels = {0: 'black', 1: 'equal', 2: 'white'}
predicted_labels = [class_labels[pred] for pred in prediction]

In [26]:
df_ans = pd.DataFrame(columns=['image_id', 'evaluation'])

In [27]:
for i in range(len(image_id)):
    df_ans.loc[i,'image_id'] = image_id[i]
    df_ans.loc[i,'evaluation'] = predicted_labels[i]

In [28]:
print(df_ans.nunique())

duplicates = df_ans[df_ans['image_id'].duplicated()]

print("Duplicate IDs:")
print(duplicates)
df_ans = df_ans.drop_duplicates(subset=['image_id'], keep='first')

print("Unique values after removing duplicates:")
print(df_ans.nunique())


image_id      8539
evaluation       3
dtype: int64
Duplicate IDs:
      image_id evaluation
2495  18078547      black
Unique values after removing duplicates:
image_id      8539
evaluation       3
dtype: int64


In [29]:
df_ans.rename(columns={'image_id': 'id'}, inplace=True)


In [30]:
df_ans.head()

Unnamed: 0,id,evaluation
0,26065374,equal
1,32481096,equal
2,36997677,white
3,34417938,black
4,28905181,black


In [31]:
df_ans.to_csv('/kaggle/working/submission_cnn.csv',index=False)

<h1>MATERIAL STRENGTH BASED APPROACH

In [32]:
import pandas as  pd
values = {'white':0,'black':0,'equal':0}
df = pd.read_csv('/kaggle/input/mqwerty1/train (2).csv')

for i in df.index:
    values[df.loc[i,'evaluation']]+=1
    
values

{'white': 7300, 'black': 7160, 'equal': 7000}

In [33]:
import pickle
with open('/kaggle/input/encodi/train_set_encodings.pkl','rb') as f:
    fen_encodings, labels = pickle.load(f)


In [34]:
# import pandas as pd
# import numpy as np

# piece_columns = ['white_pawn', 'white_knight', 'white_bishop', 'white_rook', 'white_queen', 'white_king',
#                  'black_pawn', 'black_knight', 'black_bishop', 'black_rook', 'black_queen', 'black_king']

# df_chess = pd.DataFrame(columns=piece_columns + ['label'])


# piece_map = {
#     'P': 'white_pawn', 'N': 'white_knight', 'B': 'white_bishop', 'R': 'white_rook', 'Q': 'white_queen', 'K': 'white_king',
#     'p': 'black_pawn', 'n': 'black_knight', 'b': 'black_bishop', 'r': 'black_rook', 'q': 'black_queen', 'k': 'black_king'
# }
# for index, fen in enumerate(fen_encodings):
    
#     piece_count = {piece: 0 for piece in piece_columns}    
#     rows = fen.split('/')
    
#     for row in rows:
#         for char in row:
#             if char.isdigit():
#                 continue  
#             if char in piece_map:
#                 piece_count[piece_map[char]] += 1 
    
#     df_row = pd.DataFrame(piece_count, index=[0])
#     df_row['label'] = labels[index]
    
#     df_chess = pd.concat([df_chess, df_row], ignore_index=True)

In [35]:
#df_chess.to_csv('/kaggle/working/material_data_train.csv',index=False)

In [36]:
df_chess_temp = pd.read_csv('/kaggle/input/mqwerty1/material_data_train.csv')

In [37]:
# Check unique entries in the 'label' column
unique_labels = df_chess_temp['label'].unique()
print("Unique entries in 'label' column:", unique_labels)


Unique entries in 'label' column: ['white' 'black' 'equal']


In [38]:
def map_label(label):
    if label == 'white':
        return 2
    elif label == 'black':
        return 1
    elif label == 'equal':
        return 0  
df_chess_temp['label'] = df_chess_temp['label'].apply(map_label)


In [39]:
df_chess_temp.head()

Unnamed: 0,white_pawn,white_knight,white_bishop,white_rook,white_queen,white_king,black_pawn,black_knight,black_bishop,black_rook,black_queen,black_king,label
0,5,0,2,2,1,1,6,1,2,2,0,1,2
1,7,2,2,2,1,1,6,2,2,2,1,1,1
2,0,0,1,1,0,1,2,0,0,1,0,1,0
3,5,0,2,2,1,1,6,1,1,2,1,1,1
4,0,0,0,1,0,1,3,0,0,1,0,1,1


In [40]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd


def one_hot_encode_column(df, col, num_values):
    expanded_cols = np.zeros((df.shape[0], num_values), dtype=int)
    for i, val in enumerate(df[col]):
        if 0 <= val < num_values:
            expanded_cols[i, val] = 1
    expanded_df = pd.DataFrame(expanded_cols, columns=[f"{col}_{i}" for i in range(num_values)])
    return expanded_df

columns_to_expand = {
    'white_pawn': 12, 'white_knight': 12, 'white_bishop': 12, 'white_rook': 12, 
    'white_queen': 12, 'white_king': 2, 'black_pawn': 12, 'black_knight': 12, 
    'black_bishop': 12, 'black_rook': 12, 'black_queen': 12, 'black_king': 2
}

one_hot_encoded_columns = [one_hot_encode_column(df_chess_temp, col, num_values) 
                           for col, num_values in columns_to_expand.items()]
one_hot_encoded_columns=pd.concat(one_hot_encoded_columns, axis=1)
df_chess_temp = pd.concat([pd.DataFrame(one_hot_encoded_columns), df_chess_temp['label']], axis=1)

In [41]:
df_chess_temp.head()

Unnamed: 0,white_pawn_0,white_pawn_1,white_pawn_2,white_pawn_3,white_pawn_4,white_pawn_5,white_pawn_6,white_pawn_7,white_pawn_8,white_pawn_9,...,black_queen_5,black_queen_6,black_queen_7,black_queen_8,black_queen_9,black_queen_10,black_queen_11,black_king_0,black_king_1,label
0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,1
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [42]:
int_columns = df_chess_temp.select_dtypes(include='int64').columns  
df_chess_temp[int_columns] = df_chess_temp[int_columns].astype('category')
df_chess_temp[ 'label' ] = df_chess_temp['label'].astype('int')

In [43]:
df_chess_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21459 entries, 0 to 21458
Columns: 125 entries, white_pawn_0 to label
dtypes: category(124), int64(1)
memory usage: 2.7 MB


In [44]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
piece_columns = df_chess_temp.columns[:-1]

df_chess_temp[piece_columns] = df_chess_temp[piece_columns].astype('category')


X = np.array(df_chess_temp[piece_columns].values)
y = np.array(df_chess_temp['label'].values)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=82)


In [45]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

class ReshapedCatBoostClassifier(CatBoostClassifier):
    def predict(self, X):
        return super().predict(X).reshape(-1)

estimators = [
    ('svm', SVC(C=4.485456534252414, kernel='rbf', probability=True)),
    ('catboost', ReshapedCatBoostClassifier(verbose=0)),
    ('lightgbm', LGBMClassifier())
]

modelog = VotingClassifier(estimators=estimators, voting='hard')
modelog.fit(X_train, y_train)

y_pred = modelog.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004172 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 17167, number of used features: 42
[LightGBM] [Info] Start training from score -1.123084
[LightGBM] [Info] Start training from score -1.093615
[LightGBM] [Info] Start training from score -1.079629
Accuracy: 68.99%


In [46]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

estimators = [
    ('svm', SVC(C=4.485456534252414, kernel='rbf', probability=True)),
    ('catboost', ReshapedCatBoostClassifier(verbose=0)),
    ('lightgbm', LGBMClassifier())
]

model = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    stack_method='predict_proba'  # Uses probabilistic predictions from base models
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Stacking Accuracy: {accuracy * 100:.2f}%')


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001820 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 17167, number of used features: 42
[LightGBM] [Info] Start training from score -1.123084
[LightGBM] [Info] Start training from score -1.093615
[LightGBM] [Info] Start training from score -1.079629
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 84
[LightGBM] [Info] Number of data points in the train set: 13733, number of used features: 42
[LightGBM] [Info] Start training from score -1.123085
[LightGBM] [Info] Start training from score -1.093528
[LightGBM] [Info] Start trai

In [47]:
df_chess_temp.head()

Unnamed: 0,white_pawn_0,white_pawn_1,white_pawn_2,white_pawn_3,white_pawn_4,white_pawn_5,white_pawn_6,white_pawn_7,white_pawn_8,white_pawn_9,...,black_queen_5,black_queen_6,black_queen_7,black_queen_8,black_queen_9,black_queen_10,black_queen_11,black_king_0,black_king_1,label
0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,1
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1


In [48]:
import pickle
with open('/kaggle/input/encodi/test_encodings.pkl','rb') as f:
    fen_test = pickle.load(f)

In [49]:
piece_columns = [
    'white_pawn', 'white_knight', 'white_bishop', 'white_rook', 'white_queen', 'white_king',
    'black_pawn', 'black_knight', 'black_bishop', 'black_rook', 'black_queen', 'black_king'
]

piece_map = {
    'P': 'white_pawn', 'N': 'white_knight', 'B': 'white_bishop', 'R': 'white_rook', 'Q': 'white_queen', 'K': 'white_king',
    'p': 'black_pawn', 'n': 'black_knight', 'b': 'black_bishop', 'r': 'black_rook', 'q': 'black_queen', 'k': 'black_king'
}
df_chess_test = pd.DataFrame(columns=piece_columns)

for index, fen in enumerate(fen_test):
    piece_count = {piece: 0 for piece in piece_columns}  # Reset count for each FEN
    
    rows = fen.split('/')
    
    for row in rows:
        for char in row:
            if char.isdigit():
                continue  
            if char in piece_map:
                piece_count[piece_map[char]] += 1  
    
    df_row = pd.DataFrame(piece_count, index=[0])  
    df_chess_test = pd.concat([df_chess_test, df_row], ignore_index=True)  

In [50]:
df_chess_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8540 entries, 0 to 8539
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   white_pawn    8540 non-null   object
 1   white_knight  8540 non-null   object
 2   white_bishop  8540 non-null   object
 3   white_rook    8540 non-null   object
 4   white_queen   8540 non-null   object
 5   white_king    8540 non-null   object
 6   black_pawn    8540 non-null   object
 7   black_knight  8540 non-null   object
 8   black_bishop  8540 non-null   object
 9   black_rook    8540 non-null   object
 10  black_queen   8540 non-null   object
 11  black_king    8540 non-null   object
dtypes: object(12)
memory usage: 800.8+ KB


In [51]:
def one_hot_encode_column(df, col, num_values):
    # Create an array to store the one-hot encoded columns
    expanded_cols = np.zeros((df.shape[0], num_values), dtype=int)
    
    # Iterate through each value in the column
    for i, val in enumerate(df[col]):
        if 0 <= val < num_values:
            expanded_cols[i, val] = 1
    
    # Create a DataFrame with the one-hot encoded columns
    expanded_df = pd.DataFrame(expanded_cols, columns=[f"{col}_{i}" for i in range(num_values)])
    return expanded_df

# Define the number of unique values (possible values) for each chess piece
columns_to_expand = {
    'white_pawn': 12, 'white_knight': 12, 'white_bishop': 12, 'white_rook': 12, 
    'white_queen': 12, 'white_king': 2, 'black_pawn': 12, 'black_knight': 12, 
    'black_bishop': 12, 'black_rook': 12, 'black_queen': 12, 'black_king': 2
}

# Apply one-hot encoding for each column defined in columns_to_expand
one_hot_encoded_columns = [one_hot_encode_column(df_chess_test, col, num_values) 
                           for col, num_values in columns_to_expand.items()]

# Concatenate all the one-hot encoded columns into a single DataFrame
df_chess_test = pd.concat(one_hot_encoded_columns, axis=1)

# df_chess_test now contains the one-hot encoded data for all the columns


In [52]:
df_chess_test.head()

Unnamed: 0,white_pawn_0,white_pawn_1,white_pawn_2,white_pawn_3,white_pawn_4,white_pawn_5,white_pawn_6,white_pawn_7,white_pawn_8,white_pawn_9,...,black_queen_4,black_queen_5,black_queen_6,black_queen_7,black_queen_8,black_queen_9,black_queen_10,black_queen_11,black_king_0,black_king_1
0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [53]:
int_columns = df_chess_test.select_dtypes(include='int64').columns  # Select columns with int data type
df_chess_test[int_columns] = df_chess_test[int_columns].astype('category') 

In [54]:
X_real_test = np.array(df_chess_test.values)
y_preds = modelog.predict(X_real_test)

In [55]:
df_test = pd.read_csv('/kaggle/input/the-streets-chess-showdown-white-vs-black/sample_submission.csv')
columns = ['id','evaluation']
df_ans = pd.DataFrame(columns=columns)
id_vals = df_test['image_id'].values

In [56]:
df_ans['id'] = id_vals
df_ans['evaluation'] = y_preds

In [57]:
print(df_ans.nunique())

duplicates = df_ans[df_ans['id'].duplicated()]

print("Duplicate IDs:")
print(duplicates)
df_ans = df_ans.drop_duplicates(subset=['id'], keep='first')

print("Unique values after removing duplicates:")
print(df_ans.nunique())


id            8539
evaluation       3
dtype: int64
Duplicate IDs:
            id  evaluation
2495  18078547           1
Unique values after removing duplicates:
id            8539
evaluation       3
dtype: int64


In [58]:
label_mappings = {2:'white',1:'black',0:'equal'}
df_ans['evaluation'] = df_ans['evaluation'].map(label_mappings)

In [59]:
df_ans.to_csv('/kaggle/working/submission_csv_final.csv',index=False)