In [26]:
import numpy as np
import pandas as pd

Values and functions used to process a 64 character string like it was an 8 by 8 array.
The board is represented as a 64 character string (+ is black, - is white, 0 is an empty space).

In [89]:
initial_board = '000000000000000000000000000-+000000+-000000000000000000000000000'

# Conversion values

who = ('Draw', 'Black', 'White')
marker = {'0': 0, '+': 1, '-': -1,
          0: '0', 1: '+', -1: '-',
          }
training_value = {'+': 1.0, '-': 0.0, '0': 0.5}
letter_conv = {'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7,
               'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7,
               }
increments = ((-1, -1), (-1, 0), (-1, 1),
              (0, -1), (0, 1),
              (1, -1), (1, 0), (1, 1),
              )

# Convert position formats.

def a1_num(pos):
    return (int(pos[1]) - 1) * 8 + letter_conv[pos[0]]

def a1_rc(pos):
    return int(pos[1]) - 1, letter_conv[pos[0]]

# Convert the character string representation of the board to an array.

def txt_training(brd):
    result = []
    for b in brd:
        result.append(training_value[b])
    return result

# Return the value of the board position given row/column coordinates.

def chk(brd, r, c):
    if 0 <= r < 8 and 0 <= c < 8:
        return marker[brd[r * 8 + c]]
    else:
        return 99

# Update the board position given row/column coordinates.

def upd(brd, r, c, player):
    return brd[:r * 8 + c] + marker[player] + brd[r * 8 + c + 1:]

This is the key logic! In order to know the board configurations in the game logs, you have to 'play' each move.

In [28]:
def move(brd, pos, player):
    r, c = pos
    if chk(brd, r, c) != 0:
        return brd

    for inc in increments:
        inc_r, inc_c = inc
        i = 1
        while chk(brd, r + inc_r * i, c + inc_c * i) == -player:
            i += 1
        if i > 1 and chk(brd, r + inc_r * i, c + inc_c * i) == player:
            i -= 1
            while i >= 0:
                brd = upd(brd, r + inc_r * i, c + inc_c * i, player)
                i -= 1

    return brd


Add the board configurations and player information to the game log.
Note: A player can play twice in a row if the other player does not have a valid move.

In [29]:
# Convert numeric player information to text.

def conv_winner(x):
    return who[int(x)]

def conv_log(log):
    player = 1
    b0 = initial_board
    result = []
    for i in range(0, len(log), 2):
        b1 = move(b0, a1_rc(log[i:i+2]), player)
        if b1 == b0:
            player *= -1
            b1 = move(b0, a1_rc(log[i:i+2]), player)
        result.append((who[player], b0, log[i:i+2], b1))
        b0 = b1
        player *= -1

    return tuple(result)

Read the historic game logs

In [30]:
historic_game_data = pd.read_csv('othello_dataset.csv', header=0,
                                 names=['eOthello Game ID', 'Winner','Log',],
                                 converters={'Winner': conv_winner, 'Log': conv_log},
                                 index_col=['eOthello Game ID'])
historic_game_data

Unnamed: 0_level_0,Winner,Log
eOthello Game ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1056798,Black,"((Black, 000000000000000000000000000-+000000+-..."
1050515,White,"((Black, 000000000000000000000000000-+000000+-..."
996989,Black,"((Black, 000000000000000000000000000-+000000+-..."
977775,Draw,"((Black, 000000000000000000000000000-+000000+-..."
900860,Black,"((Black, 000000000000000000000000000-+000000+-..."
...,...,...
16872,White,"((Black, 000000000000000000000000000-+000000+-..."
16761,White,"((Black, 000000000000000000000000000-+000000+-..."
16740,Black,"((Black, 000000000000000000000000000-+000000+-..."
16906,Black,"((Black, 000000000000000000000000000-+000000+-..."


Moves of interest are moves that a player made in games that they won.

In [31]:
winning_moves_list = []
for game in list(historic_game_data[historic_game_data['Winner']=='Black'].Log):
    for game_move in game:
        if game_move[0] == 'Black':
            winning_moves_list.append(('Black', game_move[1], a1_num(game_move[2])))
for game in list(historic_game_data[historic_game_data['Winner']=='White'].Log):
    for game_move in game:
        if game_move[0] == 'White':
            winning_moves_list.append(('White', game_move[1], a1_num(game_move[2])))

# For machine learning:
#   - board values are transformed to values between 0 and 1
#   - move values are transformed to integers between 0 and 63

training_df = pd.DataFrame(winning_moves_list, columns=['Player', 'Feature - Board', 'Label - Move'])
training_df['Feature - Board'] = training_df['Feature - Board'].apply(txt_training)
training_df

Unnamed: 0,Player,Feature - Board,Label - Move
0,Black,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...",37
1,Black,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...",26
2,Black,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...",44
3,Black,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...",20
4,Black,"[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, ...",34
...,...,...,...
764541,White,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, ...",14
764542,White,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, ...",62
764543,White,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, ...",8
764544,White,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",49


Feature and label training arrays.

In [85]:
features= np.vstack(training_df['Feature - Board'])
features

array([[0.5, 0.5, 0.5, ..., 0.5, 0.5, 0.5],
       [0.5, 0.5, 0.5, ..., 0.5, 0.5, 0.5],
       [0.5, 0.5, 0.5, ..., 0.5, 0.5, 0.5],
       ...,
       [0. , 0. , 0. , ..., 1. , 0. , 1. ],
       [0. , 0. , 0. , ..., 1. , 0. , 1. ],
       [0. , 0. , 0. , ..., 1. , 0. , 1. ]])

In [79]:
labels = np.array(training_df['Label - Move'])

# Train Neural Network

In [72]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

Create model

In [81]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(64,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='softmax')
])

## Data preparations and model training

In [83]:
x_train, x_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

Train model with training data.

In [84]:
model.fit(x_train, y_train, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2152ee10130>