In [1]:
import os
from helpers.utils import parse_dataset
import pandas as pd

DATA_PATH = os.path.join("data", "evals.csv")
GAMES_TO_LOAD = 32_000
parse_dataset(GAMES_TO_LOAD, DATA_PATH, starts="w")

10.0%
20.0%
30.0%
40.0%
40.0%
40.0%
50.0%
60.0%
60.0%
60.0%
70.0%
70.0%
80.0%
90.0%
100.0%
Done in 0.80s


In [2]:
import torch
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.transforms import Compose
from helpers.fen import fen_to_bitboard
from helpers.data import prepare_chess_frame

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return loss


class FenToBits(object):
    def __init__(self, merge_colors: bool):
        self.merge_colors = merge_colors

    def __call__(self, sample):
        return {
            "eval": sample["eval"],
            "board": fen_to_bitboard(sample["fen"], self.merge_colors),
        }


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        board, eval = sample["board"], sample["eval"]
        return {
            "board": torch.from_numpy(board).float(),
            "eval": torch.tensor([eval]).float(),
        }


class ToTuple(object):
    def __call__(self, sample):
        return sample["board"], sample["eval"]


class NeuralNetwork(nn.Module):
    def __init__(self, trial, input_shape):
        super().__init__()

        activation_f = trial.suggest_categorical("activation_func", ["sigmoid", "relu", "tanh"])
        if activation_f == "sigmoid":
            activation_f = nn.Sigmoid
        elif activation_f == "relu":
            activation_f = nn.ReLU
        elif activation_f == "tanh":
            activation_f = nn.Tanh

        conv_layers = trial.suggest_int("conv_layers", 1, 3)
        stack = []
        in_size = input_shape[0]
        kernel = 3
        for i in range(conv_layers):
            out_size = trial.suggest_int(f"conv_out_{i+1}", 2, 128)
            l = nn.Conv2d(in_size, out_size, kernel_size=kernel)
            in_size = out_size
            stack.extend([l, activation_f()])
        dense_layers = trial.suggest_int("dense_layers", 1, 3)
        stack.append(nn.Flatten())
        in_size *= (8 - conv_layers * 2) ** 2
        for i in range(dense_layers):
            out_size = trial.suggest_int(f"dense_out_{i+1}", 4, 128)
            dropout = trial.suggest_float(f"dropout_{i+1}", 0, 0.5)
            l = nn.Linear(in_size, out_size)
            in_size = out_size
            stack.extend([l, activation_f(), nn.Dropout(dropout)])
        stack.append(nn.Linear(in_size, 1))
        self.stack = nn.Sequential(*stack)

    def forward(self, x):
        logits = self.stack(x)
        return logits


class ChessDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        df = pd.read_csv(csv_file)
        self.df = prepare_chess_frame(df, normalize=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        row = self.df.iloc[idx]
        sample = {"fen": row["fen"], "eval": row["eval"]}

        if self.transform:
            sample = self.transform(sample)

        return sample

Using cuda device


In [6]:
import optuna
from optuna.trial import TrialState

def objective(trial):
    BATCH_SIZE = 256
    print(f"Running trial {trial.number}")

    merge_colors = trial.suggest_categorical("merge_colors", ["True", "False"])
    chess_dataset = ChessDataset(
        csv_file=DATA_PATH,
        transform=Compose([FenToBits(merge_colors == "True"), ToTensor(), ToTuple()]),
    )
    train_set, test_set = random_split(chess_dataset, [0.8, 0.2])
    train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE)
    test_dataloader = DataLoader(test_set, batch_size=BATCH_SIZE)

    dataset_shape = chess_dataset[0][0].shape
    model = NeuralNetwork(trial=trial, input_shape=dataset_shape).to(device)
    loss_fn = nn.MSELoss()

    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)

    history = {"train_loss": [], "test_loss": []}
    epochs = 12
    for epoch in range(epochs):
        # print(f"Epoch {epoch+1}\n---------------------")
        train_loss = train(train_dataloader, model, loss_fn, optimizer)

        # test
        num_batches = len(test_dataloader)
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for X, y in test_dataloader:
                X, y = X.to(device), y.to(device)
                pred = model(X)
                test_loss += loss_fn(pred, y).item()

        test_loss /= num_batches
        trial.report(test_loss, epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        history["train_loss"].append(train_loss)
        history["test_loss"].append(test_loss)
    return history['test_loss'][-1]


if __name__ == "__main__":
    study = optuna.create_study(
        direction="minimize",
        storage="sqlite:///data/db2.sqlite3",
        study_name="chess-ai-dropout",
        load_if_exists=True,
    )
    study.optimize(objective, n_trials=30)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2024-01-21 08:43:49,375] Using an existing study with name 'chess-ai-dropout' instead of creating a new one.


Running trial 133


[I 2024-01-21 08:43:58,005] Trial 133 pruned. 


Running trial 134


[I 2024-01-21 08:44:06,554] Trial 134 pruned. 


Running trial 135


[I 2024-01-21 08:44:14,914] Trial 135 pruned. 


Running trial 136


[I 2024-01-21 08:44:23,303] Trial 136 pruned. 


Running trial 137


[I 2024-01-21 08:44:31,657] Trial 137 pruned. 


Running trial 138


[I 2024-01-21 08:44:40,262] Trial 138 pruned. 


Running trial 139


[I 2024-01-21 08:44:48,596] Trial 139 pruned. 


Running trial 140


[I 2024-01-21 08:44:56,916] Trial 140 pruned. 


Running trial 141


[I 2024-01-21 08:45:21,342] Trial 141 pruned. 


Running trial 142


[I 2024-01-21 08:45:30,022] Trial 142 pruned. 


Running trial 143


[I 2024-01-21 08:45:52,612] Trial 143 pruned. 


Running trial 144


[I 2024-01-21 08:46:00,355] Trial 144 pruned. 


Running trial 145


[I 2024-01-21 08:46:08,120] Trial 145 pruned. 


Running trial 146


[I 2024-01-21 08:46:15,905] Trial 146 pruned. 


Running trial 147


[I 2024-01-21 08:47:45,042] Trial 147 finished with value: 0.14128362238407136 and parameters: {'merge_colors': 'False', 'activation_func': 'relu', 'conv_layers': 1, 'conv_out_1': 108, 'dense_layers': 3, 'dense_out_1': 122, 'dropout_1': 0.16845183462902655, 'dense_out_2': 71, 'dropout_2': 0.30326837057087386, 'dense_out_3': 54, 'dropout_3': 0.43426953932282253, 'lr': 0.0007004684993010298, 'optimizer': 'RMSprop'}. Best is trial 147 with value: 0.14128362238407136.


Running trial 148


[I 2024-01-21 08:49:19,835] Trial 148 finished with value: 0.1410828161239624 and parameters: {'merge_colors': 'False', 'activation_func': 'relu', 'conv_layers': 1, 'conv_out_1': 89, 'dense_layers': 2, 'dense_out_1': 124, 'dropout_1': 0.22910426774976667, 'dense_out_2': 59, 'dropout_2': 0.2321420275948466, 'lr': 0.0007271347627868801, 'optimizer': 'Adam'}. Best is trial 148 with value: 0.1410828161239624.


Running trial 149


[I 2024-01-21 08:49:28,440] Trial 149 pruned. 


Running trial 150


[I 2024-01-21 08:50:59,356] Trial 150 finished with value: 0.1554018673300743 and parameters: {'merge_colors': 'False', 'activation_func': 'relu', 'conv_layers': 1, 'conv_out_1': 92, 'dense_layers': 2, 'dense_out_1': 126, 'dropout_1': 0.13714022337190598, 'dense_out_2': 74, 'dropout_2': 0.25752573170156945, 'lr': 0.0007401900227268707, 'optimizer': 'RMSprop'}. Best is trial 148 with value: 0.1410828161239624.


Running trial 151


[I 2024-01-21 08:51:07,106] Trial 151 pruned. 


Running trial 152


[I 2024-01-21 08:51:15,450] Trial 152 pruned. 


Running trial 153


[I 2024-01-21 08:51:26,307] Trial 153 pruned. 


Running trial 154


[I 2024-01-21 08:52:53,684] Trial 154 finished with value: 0.1652970325946808 and parameters: {'merge_colors': 'False', 'activation_func': 'relu', 'conv_layers': 1, 'conv_out_1': 99, 'dense_layers': 2, 'dense_out_1': 120, 'dropout_1': 0.12331827684699766, 'dense_out_2': 67, 'dropout_2': 0.2563146870951069, 'lr': 0.0005495014045633169, 'optimizer': 'RMSprop'}. Best is trial 148 with value: 0.1410828161239624.


Running trial 155


[I 2024-01-21 08:53:01,360] Trial 155 pruned. 


Running trial 156


[I 2024-01-21 08:53:31,139] Trial 156 pruned. 


Running trial 157


[I 2024-01-21 08:53:38,708] Trial 157 pruned. 


Running trial 158


[I 2024-01-21 08:53:46,390] Trial 158 pruned. 


Running trial 159


[I 2024-01-21 08:53:54,084] Trial 159 pruned. 


Running trial 160


[I 2024-01-21 08:54:01,700] Trial 160 pruned. 


Running trial 161


[I 2024-01-21 08:54:09,357] Trial 161 pruned. 


Running trial 162


[I 2024-01-21 08:54:17,666] Trial 162 pruned. 


Study statistics: 
  Number of finished trials:  163
  Number of pruned trials:  144
  Number of complete trials:  17
Best trial:
  Value:  0.1410828161239624
  Params: 
    merge_colors: False
    activation_func: relu
    conv_layers: 1
    conv_out_1: 89
    dense_layers: 2
    dense_out_1: 124
    dropout_1: 0.22910426774976667
    dense_out_2: 59
    dropout_2: 0.2321420275948466
    lr: 0.0007271347627868801
    optimizer: Adam
