In [None]:
!pip install pandas
!pip install scikeras[tensorflow]
!pip install imbalanced-learn

In [None]:
!chmod +x /kaggle/input/regression/download.sh
!source /kaggle/input/regression/download.sh

In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as  plt
import gc
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler

In [None]:
def alter_data(X):
    X = X.values
    turn = X[:,64]
    black_to_play = np.where(turn == -1)[0]
    # Multiply the values from indexes 0 to 63 by -1 where index 64 is 0
    X[black_to_play, :64] *= -1
    X = X[:, 0:64].reshape((X.shape[0], 8 , 8))
    return X

In [None]:
def load_n_datasets(n,foldername):
    all_datasets = []
    filenames = os.listdir(foldername)
    for i in range(n):
        filename = filenames[i]
        if filename.endswith('.csv'):
            dataset = pd.read_csv(os.path.join('classification_data', filename))
            all_datasets.append(dataset)
    combined_dataset = pd.concat(all_datasets, ignore_index=True)
    
    X =  combined_dataset.iloc[:,0:-1]
    y =  combined_dataset["Evaluation"].astype(int)
    
    X,y = alter_data(X,y)
    X = X.reshape(X.shape[0], -1)
    X = pd.DataFrame(X)
    y = pd.DataFrame(y)
    
    undersampler = RandomUnderSampler()
    X_resampled, y_resampled = undersampler.fit_resample(X, y)
    return X_resampled,y_resampled

In [None]:
def bit_map(X):
    channels = np.zeros((X.shape[0],8, 8, 12))  # 12 channels for 6 types of pieces for each player
    # Generate separate channels for each player
    for player in range(2):  # 0 for white pieces, 1 for black pieces
        if player ==0:
            for piece_type in range(6):  # 6 types of pieces
                piece_mask = X == (piece_type + 1)
                channels[:, :, :, player * 6 + piece_type] = piece_mask.astype(np.float32)
        else:
            for piece_type in range(6):  # 6 types of pieces
                piece_mask = -1*(X == (-piece_type - 1))
                channels[:, :, :, player * 6 + piece_type] = piece_mask.astype(np.float32)
    X = None
    return channels

In [None]:
n_datasets = 10
X_chess_data,y_chess_data = load_n_datasets(n_datasets,'processed_data')

X = pd.DataFrame(X_chess_data)
y = pd.DataFrame(y_chess_data)

print(type(X_chess_data))
print(X_chess_data.shape)

X = alter_data(X_chess_data)
X = bit_map(X)

In [None]:
y = (y - y.min()) / (y.max()-y.min())

In [None]:
X_chess_data = None
y_chess_data = None
gc.collect()

In [None]:
X_train,X_val,y_train,y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_val,X_test,y_val,y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42)
#80% training, 10% validation and 10% testing.

input_shape = X_train.shape[1:]

In [None]:
X = None
y = None
gc.collect()

In [None]:
import tensorflow
from keras.models import Sequential 
from keras.layers import Conv2D, Flatten, BatchNormalization, Dense, Dropout
from keras.activations import elu
from keras.optimizers import SGD,Adam

tensorflow.random.set_seed(101)

In [None]:
def create_model_mlp(hp):
    
    n_neurons = hp.Int('n_neurons', min_value=2048, max_value=3072, step=1024)
    n_neurons2 = hp.Int('n_neurons', min_value=1024, max_value=2048, step=1024)
    n_neurons3 = hp.Int('n_neurons', min_value=512, max_value=1024, step=512)
    dropout = hp.Choice('dropout', values=[0.2,0.4,0.6])
    learning_rate = hp.Choice('learning_rate', values=[0.0005,0.001,0.005,0.01])
    
    mlp_model = Sequential()

    # Add hidden layers with ReLU activation and dropout
    mlp_model.add(Dense(n_neurons, activation='relu', input_shape=input_shape))
    mlp_model.add(Dropout(dropout))
    mlp_model.add(Dense(n_neurons2, activation='relu'))
    mlp_model.add(Dropout(dropout))
    mlp_model.add(Dense(n_neurons3, activation='relu'))
    mlp_model.add(Dropout(dropout))

    # Layer to ensure the output matches the labels in shape
    mlp_model.add(Flatten())
    
    # Add output layer with linear activation for regression
    mlp_model.add(Dense(1, activation='sigmoid'))

    # Compile model with Adam optimizer
    adam = Adam(learning_rate=learning_rate, beta_1=0.90, beta_2=0.99, epsilon=1e-8)
    mlp_model.compile(optimizer=adam, loss='mean_squared_error', metrics=['mae'])

    # Print model summary
    mlp_model.summary()
    return mlp_model

In [None]:
def create_model_cnn(hp):
    cnn_model = Sequential()
    n_neurons = hp.Int('n_neurons', min_value=250, max_value=750, step=250)
    dropout = hp.Choice('dropout', values=[0,0.2,0.4,0.6])
    activation = hp.Choice('activation', values=['elu', 'leaky_relu'])
    
    # Add convolutional layers with ELU activation and batch normalization
    cnn_model.add(Conv2D(20, (5, 5), activation=elu, input_shape=input_shape, padding='same'))
    cnn_model.add(BatchNormalization())
    cnn_model.add(Dropout(dropout))
    
    cnn_model.add(Conv2D(50, (3, 3), activation=elu, padding='same'))
    cnn_model.add(BatchNormalization())
    cnn_model.add(Dropout(dropout))
    
    # Add fully connected layer
    cnn_model.add(Flatten())

    cnn_model.add(Dense(n_neurons, activation=activation))
    cnn_model.add(BatchNormalization())
    cnn_model.add(Dropout(dropout))
    cnn_model.add(Dense(n_neurons, activation=activation))
    cnn_model.add(BatchNormalization())
    cnn_model.add(Dropout(dropout))
    
    # Add output layer with linear activation for regression
    cnn_model.add(Dense(1, activation='sigmoid'))

    # Compile model with SGD optimizer
    sgd = SGD(learning_rate=0.01, nesterov=False)
    cnn_model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['mae'])

    # Print model summary
    cnn_model.summary()
    return cnn_model

In [None]:
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters

def run_random_search(X_train,y_train,X_val, y_val):
    tuner = RandomSearch(
        create_model_mlp,
        objective='val_mae',
        max_trials=10,  # Number of hyperparameter combinations to try
        executions_per_trial=1,  # Number of models to build and train for each trial
        directory='grid_search',  # Directory to save the results
        project_name='cnn')  # Name for the project


    # Start the hyperparameter search
    tuner.search(X_train, y_train, epochs=5, validation_data=(X_val, y_val), batch_size=128)

    # Get the best hyperparameters
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

    print("The best hyperparameters are:")
    for hparam in best_hps.space:
        name = hparam.name
        value = best_hps.get(name)
        print(f"{name}: {value}")

In [None]:

batch_size = 128
epochs = 10

run_random_search(X_train,y_train,X_val, y_val)