In [3]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
data = pd.read_csv('../datasets/connect4data.csv', index_col=False)
data.head()

Unnamed: 0,outcome,moves,board,-,-.1,-.2,-.3,-.4,-.5,-.6,...,-.80,-.81,-.82,policy,-.83,-.84,-.85,-.86,-.87,-.88
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.024,0.038,0.058,0.758,0.057,0.038,0.025
1,1,1,0,0,0,0,0,0,0,0,...,0,0,1,0.039,0.054,0.068,0.636,0.105,0.059,0.039
2,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0.038,0.044,0.114,0.506,0.193,0.063,0.042
3,1,2,0,0,0,0,0,0,0,0,...,1,1,0,0.038,0.054,0.214,0.529,0.043,0.081,0.041
4,1,2,0,0,0,0,0,0,0,0,...,0,0,1,0.045,0.05,0.346,0.361,0.051,0.082,0.065


In [5]:
ENTRIES = data.shape[0]

In [6]:
states = np.array(data[["board", "-"] + [f"-.{i}" for i in range(1, 83)]])
policies = np.array(data[["policy"] + [f"-.{i}" for i in range(83, 89)]])
states[0]
firstpolicy = policies[0]

In [7]:
states = states.reshape(ENTRIES, 6, 7, 2)  # reshape to 6x7x2 for cnn

In [8]:
data = list(zip(states, policies))

In [9]:
# shuffle data
np.random.shuffle(data)
# split into train and test
train_data = data[:int(ENTRIES*0.8)]
test_data = data[int(ENTRIES*0.8):]

In [10]:
from net import get_model
model = get_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 6, 7, 2)]    0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 6, 7, 128)    2432        ['input[0][0]']                  
                                                                                                  
 conv2d_1 (Conv2D)              (None, 6, 7, 128)    147584      ['conv2d[0][0]']                 
                                                                                                  
 conv2d_2 (Conv2D)              (None, 6, 7, 128)    147584      ['conv2d_1[0][0]']               
                                                                                              

In [11]:
# train model
xs = np.array([x for x, _ in train_data])
ys = np.array([y for _, y in train_data])
xs_test = np.array([x for x, _ in test_data])
ys_test = np.array([y for _, y in test_data])

In [13]:
# tensorboard
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir, histogram_freq=1)


In [14]:
model.fit(xs, ys, epochs=500, validation_data=(xs_test, ys_test), batch_size=32, callbacks=[tensorboard_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x2613166efa0>

In [None]:
# test loss on both test and train data
loss_test = model.evaluate(xs_test, ys_test)
loss_train = model.evaluate(xs, ys)
print(f"Test loss: {loss_test}")
print(f"Train loss: {loss_train}")

In [None]:
# for the starting position, we expect this dist
plt.bar(range(7), firstpolicy)

In [None]:
mock_board = np.zeros((6, 7, 2)) # looks like the starting position
dist = model.predict(mock_board.reshape(1, 6, 7, 2))[0]
# plot dist as a bar chart
plt.bar(range(7), dist)

In [None]:
mock_board[5, 3, 0] = 1  # place a piece in column 4 (index 3)
dist = model.predict(mock_board.reshape(1, 6, 7, 2))[0]
# plot dist as a bar chart
plt.bar(range(7), dist)


In [None]:
mock_board[4, 3, 1] = 1  # place a piece in column 4 (index 3)
mock_board[5, 2, 0] = 1  # place a piece in column 3 (index 2)
dist = model.predict(mock_board.reshape(1, 6, 7, 2))[0]
# plot dist as a bar chart
plt.bar(range(7), dist)


In [None]:
## model.save('direct_conv_policy.h5')