In [1]:
import gc
import numpy
import os
import pandas
import tensorflow

from datetime import *
from sklearn.model_selection import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.regularizers import *

In [2]:
# Retrieve a list of available GPUs
gpus = tensorflow.config.list_physical_devices('GPU')
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# Set the visible devices and enable memory growth for the GPU
if gpus:
    try:
        tensorflow.config.set_visible_devices(gpus[0], 'GPU')
        tensorflow.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

In [4]:
# Load data
df = pandas.read_csv('animal_chess.csv')
df.head()

Unnamed: 0,board,side,piece,atk,move,river,trap,den,score,winner
0,l-r---E-T-d-----C---p---W-------------w---P---...,-1,R,1,G7G6,0,0,0,0,0
1,l--r--E-T-d-----C---p---W-------------w---P---...,1,r,1,A3A4,0,0,0,0,0
2,l--r--E-T-d-----C---p---W-------------w---P---...,-1,R,1,G6G5,0,0,0,0,0
3,l--r--E-T-d-----C---p---W------------w----P---...,1,w,4,E3E2,0,0,0,0,0
4,l--r--E-T-d-----C---p---W------------w----P---...,-1,R,1,G5G4,0,0,0,0,0


In [5]:
df = df[:40_000_000]

In [6]:
# Calculate the total number of rows
count = len(df)
count

40000000

In [7]:
# Extract the sample at index 32
sample = df.iloc[32]
sample

board     ----r-E-Tl-----WC--d---------p------L--R--P---...
side                                                     -1
piece                                                     L
atk                                                       0
move                                                   F1E1
river                                                     0
trap                                                      1
den                                                       0
score                                                   -90
winner                                                    0
Name: 32, dtype: object

In [8]:
# Encode chess pieces to integer
def encode_piece(piece_char):
    piece_mapping = {'-': 0, 'r': 1, 'c': 2, 'd': 3, 'w': 4, 'p': 5, 't': 6, 'l': 7, 'e': 8, 'R': -1, 'C': -2, 'D': -3, 'W': -4, 'P': -5, 'T': -6, 'L': -7, 'E': -8}
    return piece_mapping.get(piece_char, 0)

encode_piece(sample['piece'])

-7

In [9]:
# Encode chess board to matrix
def encode_board(board_str):
    board_matrix = numpy.zeros((9, 7))
    for i, piece in enumerate(board_str[::-1]):
        row, col = divmod(i, 9)
        board_matrix[col][row] = encode_piece(piece)
    return board_matrix[::-1, ::-1]

encode_board(sample['board'])

array([[ 0.,  7.,  0.,  0., -7.,  0.,  0.],
       [ 0.,  0.,  3.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  5.,  0.,  0.,  4.],
       [ 0.,  0.,  0.,  0., -1.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [-8., -4.,  0.,  0., -5.,  0.,  0.],
       [ 0., -2.,  0.,  0.,  0., -3.,  0.],
       [-6.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [10]:
# Encode all chess boards
df['board_encoded'] = df['board'].apply(encode_board)
board_matrix_flattened = numpy.array(df['board_encoded'].tolist()).reshape(count, -1)
board_matrix_flattened

array([[ 7.,  0.,  0., ...,  0.,  0., -7.],
       [ 7.,  0.,  0., ...,  0.,  0., -7.],
       [ 7.,  0.,  0., ...,  0.,  0., -7.],
       ...,
       [ 7.,  0.,  0., ...,  0.,  0., -7.],
       [ 7.,  0.,  0., ...,  0.,  0., -7.],
       [ 0.,  7.,  0., ...,  0.,  0., -7.]])

In [11]:
# Prepare data for model training
X = numpy.array(df['board_encoded'].tolist()).reshape(35_000_000, -1)
y = df['score'].values
# X = numpy.array(df['board_encoded'].tolist()).reshape(count, 9, 7, 1)
# y = df['score'].values
X.shape, y.shape

((35000000, 72), (40000000,))

In [12]:
# Reset Keras session
def reset_keras():
    tensorflow.keras.backend.clear_session()
    gc.collect()

In [13]:
# def build_model(input_shape):
#     model = Sequential([
#         Input(shape=input_shape),
#         Conv2D(128, (3, 3), padding='same'),
#         BatchNormalization(),
#         Activation('relu'),
#         MaxPooling2D((2, 2)),
#         Conv2D(256, (3, 3), padding='same'),
#         BatchNormalization(),
#         Activation('relu'),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(2048, activation='relu', kernel_regularizer=l2(0.01)),
#         Dropout(0.5),
#         Dense(1, activation='linear')
#     ])
#     model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['mae'])
#     return model

In [14]:
# def build_model(input_shape):
#     model = Sequential([
#         Input(shape=input_shape),
#         Conv2D(128, (3, 3), padding='same'),
#         BatchNormalization(),
#         LeakyReLU(alpha=0.01),
#         MaxPooling2D((2, 2)),
#         Conv2D(256, (3, 3), padding='same'),
#         BatchNormalization(),
#         LeakyReLU(alpha=0.01),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(2048, activation='relu', kernel_regularizer=l2(0.01)),
#         Dropout(0.5),
#         Dense(1, activation='linear')
#     ])
#     model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['mae'])
#     return model

In [15]:
# Build the model architecture
def build_model(input_shape, activation='relu'):
    # Create a sequential model
    model = Sequential([
        # Input layer specifies the shape of the input data
        Input(shape=input_shape),
        # First convolution layer with 128 filters
        Conv2D(128, (3, 3), padding='same'),
        BatchNormalization(),
        Activation(activation),
        MaxPooling2D((2, 2)),
        # Second convolution layer with 256 filters
        Conv2D(256, (3, 3), padding='same'),
        BatchNormalization(),
        Activation(activation),
        MaxPooling2D((2, 2)),
        # Flatten the output from 2D to 1D before passing to the dense layer
        Flatten(),
        Dense(2048, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        # Output layer with linear activation to predict a continuous value
        Dense(1, activation='linear')
    ])
    # Compile the model with Adam optimizer and mean squared error loss
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=['mae'])
    # Return the model
    return model

In [16]:
# Set up TensorBoard logging with a timestamped directory to monitor the training process
log_dir = os.path.join("logs", "fit", datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [17]:
# Initialize callbacks for adaptive learning rate, early stopping to prevent overfitting, and saving the best model
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint("best_model.h5", monitor='val_loss', save_best_only=True, save_format='tf')

In [18]:
# Create the neural network model with the specified input shape and activation function
model = build_model((9, 7, 1), activation='leaky_relu')
# model = build_model((9, 7, 1))

In [19]:
# Initialize an empty list for storing results and a KFold object for 5-fold cross-validation
results = []
kf = KFold(n_splits=5)

# Cross-validation to evaluate model
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    reset_keras()
    history = model.fit(X_train.reshape(-1, 9, 7, 1), y_train, epochs=50, batch_size=256, validation_data=(X_test.reshape(-1, 9, 7, 1), y_test), callbacks=[reduce_lr, early_stopping, checkpoint, tensorboard_callback])
    reset_keras()
    results.append(model.evaluate(X_test.reshape(-1, 9, 7, 1), y_test))

# for train_index, test_index in kf.split(X):
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]
#     reset_keras()
#     history = model.fit(X_train, y_train, epochs=50, batch_size=256, validation_data=(X_test, y_test), callbacks=[reduce_lr, early_stopping, checkpoint, tensorboard_callback])
#     reset_keras()
#     results.append(model.evaluate(X_test, y_test))

# Output the results of cross-validation
print("Fold results:", results)
print("Average result:", numpy.mean(results, axis=0))

ValueError: Data cardinality is ambiguous:
  x sizes: 32000000
  y sizes: 28000000
Make sure all arrays contain the same number of samples.