### Imports

In [None]:
from typing import Tuple

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.models as models
import tensorflow.keras.layers as layers
import tensorflow.keras.optimizers as optimizers
import tensorflow.keras.callbacks as callbacks
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

ThreeDimArr = List[List[List[int]]]
Board = List[List[int]]

### Generate dataset for training model


In [None]:
def evaluate_state(state: State) -> int:
  utility = 0
  weights = [[100, -25,  50, 10, 10,  50, -25, 100],
              [-25, -50, -10, -5, -5, -10, -50, -25],
              [ 50, -10,   5,  1,  1,   5, -10,  50],
              [ 10,  -5,   1, -1, -1,   1,  -5,  10],
              [ 10,  -5,   1, -1, -1,   1,  -5,  10],
              [ 50, -10,   5,  1,  1,   5, -10,  50],
              [-25, -50, -10, -5, -5, -10, -50, -25],
              [100, -25,  50, 10, 10,  50, -25, 100]]

  for row in range(State.SIZE):
      for col in range(State.SIZE):
          if state.board[row][col] == State.BLACK:
              utility += weights[row][col]
          if state.board[row][col] == State.WHITE:
              utility -= weights[row][col]

  return utility

def generate_dataset(num_data: int = 100, normalize: bool = False) -> Tuple[np.array, np.array]:
  X, y = [], []
  for i in range(num_data):
    if i % 100 == 0:
      print(i)
    state = get_random_state()
    board = reshape_board(state)
    score = evaluate_state(state)

    X.append(board)
    y.append(score)

  X_train = np.array(X)
  y_train = np.array(y)

  if normalize:
    scaler = MinMaxScaler()
    y_2d = y_train.reshape(-1, 1)
    scaler.fit(y_2d)
    y_train = scaler.transform(y_2d)

  return X_train, y_train

def get_random_state(max_depth: int = 30) -> State:
    state = State()
    depth = random.randrange(0, max_depth)

    for _ in range(depth):
        valid_moves = state.valid_moves()
        random_move = random.choice(valid_moves)
        new_state = state.place_disk(random_move)
        if new_state.game_over():
            break
        state = new_state

    return state


### Calculate the score for a given state

In [None]:
def reshape_board(state: State) -> ThreeDimArr:
    board = state.board
    board_3d = np.zeros((4, 8, 8), dtype=np.int8)

    # 3rd dimension - black, white, valid move, vulnerable disk

    # black, white
    for row_index, row in enumerate(board):
        for col_index, cell in enumerate(row):
            if cell == State.BLACK:
                board_3d[0][row_index][col_index] = 1
            elif cell == State.WHITE:
                board_3d[1][row_index][col_index] = 1

    # valid moves
    valid_moves = state.valid_moves()
    for row_index, col_index in valid_moves:
        board_3d[2][row_index][col_index] = 1

    # vulnerable disks
    for row_index in range(State.SIZE):
        for col_index in range(State.SIZE):
            if state.is_disk_vulnerable((row_index, col_index)):
                board_3d[3][row_index][col_index] = 1

    return board_3d


### Build model

In [None]:
X_train, y_train = generate_dataset(num_data=10_000, normalize=True)

In [None]:
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters=32,
                               kernel_size=3,
                               padding="same",
                               activation="relu",
                               input_shape=(4, State.SIZE, State.SIZE)),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(pool_size=2, padding="valid"),
        tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ])

    model.compile(loss=tf.keras.losses.mae,
                  optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4),
                  metrics=["mae"])

    model.fit(X_train, y_train,
              batch_size=2048,
              epochs=1000,
              verbose=1,
              validation_split=0.1,
              callbacks=[callbacks.ReduceLROnPlateau(monitor='loss', patience=10),
                         callbacks.EarlyStopping(monitor='loss', patience=15, min_delta=1e-4)])

    return model

model = build_model()

### Test model

from tensorflow.keras import models

# model = models.load_model('not_normalized.h5')

# Agent that uses a neural network to (attempt to) compute the optimal move
class NeuralNetworkAgent(Agent):
    def evaluate(self, state: State) -> int:
          board_3d = reshape_board(state)
          board_3d = np.expand_dims(board_3d, 0)
          return model(board_3d)[0][0]

    def __str__(self) -> str:
        return "Neural Network"

state = get_random_state()
print(state)
neural_network_agent = NeuralNetworkAgent()
move = neural_network_agent.get_action(state)
print(move)

In [None]:
model.summary()

model.save('1000_normalized.h5')