In [1]:
from tensorflow import keras
from keras import layers
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = np.load('../../data/othello_train_data.npz' ,allow_pickle=True)
X, y = data['X'], data['y']
X = np.where(X == 3, 0, X)



In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = scaler.fit_transform(X_train.reshape(-1, 64)).reshape(-1, 8, 8)
X_val = scaler.transform(X_val.reshape(-1, 64)).reshape(-1, 8, 8)

In [4]:
print("Training data shape:", X_train.shape, y_train.shape)
print("-" *10)

print(X_train[50])
print(f'This game winner was {y_train[50]}')

Training data shape: (47104, 8, 8) (47104,)
----------
[[ 1.  1. -1.  0.  1.  0. -1.  1.]
 [-1.  1.  0.  1.  1.  1.  0.  1.]
 [-1. -1.  1.  0.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  0.  0.  1.]
 [-1. -1.  1.  1.  1.  1.  0.  1.]
 [-1. -1.  1.  1.  1.  0.  0.  0.]
 [ 0.  0.  0.  1.  1.  1. -1.  1.]
 [-1.  0. -1.  0. -1.  1. -1. -1.]]
This game winner was -1


In [None]:
network = keras.Sequential([
    layers.Input(shape=(8,8,1)),

    layers.Conv2D(32, (3, 3), padding='same'),
    layers.BatchNormalization(),
    layers.ReLU(),
    
    layers.Conv2D(32, (3, 3), padding='same'),
    layers.BatchNormalization(),
    layers.ReLU(),

    layers.Flatten(),
    layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
    layers.Dropout(0.3),


    layers.Dense(1, activation='tanh')
    ])
network.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),loss='mse',metrics=['r2_score'])

In [6]:
early_stop = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)


lr_scheduler = keras.callbacks.ReduceLROnPlateau(
    monitor='val_r2_score',     
    factor=0.5,              
    patience=5,              
    min_lr=1e-5,             
    verbose=1
)


history = network.fit(X_train, y_train, epochs=200, batch_size=32,validation_split=0.2,callbacks=[early_stop,lr_scheduler])

Epoch 1/200
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.8542 - r2_score: 0.0336 - val_loss: 0.7115 - val_r2_score: 0.1902 - learning_rate: 0.0010
Epoch 2/200
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6798 - r2_score: 0.2327 - val_loss: 0.6308 - val_r2_score: 0.3186 - learning_rate: 0.0010
Epoch 3/200
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.5864 - r2_score: 0.3814 - val_loss: 0.5582 - val_r2_score: 0.4319 - learning_rate: 0.0010
Epoch 4/200
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.5116 - r2_score: 0.4912 - val_loss: 0.5276 - val_r2_score: 0.4843 - learning_rate: 0.0010
Epoch 5/200
[1m1178/1178[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.4735 - r2_score: 0.5525 - val_loss: 0.4967 - val_r2_score: 0.5298 - learning_rate: 0.0010
Epoch 6/200
[1m1161/1178[0m [32m━━━━━━━━━━━━━━━━━━━

In [7]:
network.evaluate(X_val, y_val)

[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2604 - r2_score: 0.7239


[0.26870667934417725, 0.7146536707878113]

In [None]:
#Uncomment below to save the  model
#network.save("../../models/model_v2.keras")