In [66]:
import numpy as np
from scipy.signal import convolve2d
import tensorflow as tf
from tensorflow import keras

physical_devices = tf.config.list_physical_devices('GPU')
print(tf.__version__)
print("Num GPUs:", len(physical_devices))

train_episodes = 50
test_episodes = 10

#definition of epsilon greedy: random action with probability epsilon
#Epsilon-greedy algorithm in initialized at 1 meaning every step is random at the start
#Epsilon of 1 means we are exploring all the time

epsilon = 1 
max_epsilon = 1 
min_epsilon = 0.01 
decay = 0.01

#discount rate for future rewards
gamma = 0.9

#update the target network every 10 episodes
target_update_episodes = 10

learning_rate = 0.001

batch_size = 128

#Defining Models
model = keras.Sequential()
model.add(keras.layers.Dense(24, input_shape=(42,), activation='relu'))
model.add(keras.layers.Dense(12, activation='relu'))
model.add(keras.layers.Dense(7, activation='linear'))

model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['mse'])

#define target model
target_model = keras.Sequential()
target_model.add(keras.layers.Dense(24, input_shape=(42,), activation='relu'))
target_model.add(keras.layers.Dense(12, activation='relu'))
target_model.add(keras.layers.Dense(7, activation='linear'))

target_model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), metrics=['mse'])

target_model.set_weights(model.get_weights())

board = np.zeros((6,7), dtype=int)

horizontal_kernel = np.array([[ 1, 1, 1, 1]])
vertical_kernel = np.transpose(horizontal_kernel)
diag1_kernel = np.eye(4, dtype=np.uint8)
diag2_kernel = np.fliplr(diag1_kernel)
detection_kernels = [horizontal_kernel, vertical_kernel, diag1_kernel, diag2_kernel]

def check_for_winning_move(board, player):
    for kernel in detection_kernels:
        if (convolve2d(board == player, kernel, mode="valid") == 4).any():
            return True
    return False

def make_move(player, move):
    for i in range(5,-1,-1):
        if board[i,move] == 0:
            board[i,move] = player
            return True
    return False
            
def random_agent():
    make_move(2,np.random.randint(7))

2.8.0
Num GPUs: 1


In [64]:
board = np.zeros((6,7), dtype=int)

for i in range(train_episodes):
    
    #50% chance of opponent going first
    if np.random.randint(2) == 1:
        random_agent()
        
    print(board)
    break

[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]


In [67]:
board = np.zeros((6,7), dtype=int)
for i in range(10):
    print(make_move(1, 0))
    print(board)

True
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
True
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
True
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
True
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
True
[[0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
True
[[1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
False
[[1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
False
[[1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
False
[[1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0]]
False
[[1 0 0 0 0 0 0]
 [