In [1]:
from play import Simulation

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [3]:
def make_agent(winner_moves, model_name):
    # Make x, y sets
    x = np.array([move[0] for move in winner_moves])
    y = np.array([move[1] for move in winner_moves])
    y_one_hot = np.zeros((y.shape[0], x[0].shape[1]))
    for i in range(y.shape[0]):
        y_one_hot[i, y[i]] = 1

    # Flatten x (transform the matrix representation of the board into a one-dimensional vector)
    x = np.array([np.reshape(i, i.size) for i in x])

    # Make train-test sets
    validation_idx = np.random.random(x.shape[0]) < 0.2
    train_idx = np.logical_not(validation_idx)
    x_train = x[train_idx]
    y_train = y_one_hot[train_idx]
    x_validation = x[validation_idx]
    y_validation = y_one_hot[validation_idx]
    
    # Train a model
    model = keras.Sequential(
        layers=[
            tf.keras.layers.Flatten(input_shape=x_train[0].shape),
            tf.keras.layers.Dense(units=50, activation='relu'),
            tf.keras.layers.Dense(units=6, activation='softmax')
        ]
    )
    model.compile(
        loss='mean_squared_error',
        optimizer='sgd'
    )
    model.fit(
        x=x_train,
        y=y_train,
        batch_size=32,
        epochs=10,
        validation_data=(x_validation, y_validation)
    )
    
    # Save model
    model.save(model_name)

    # Make an agent
    def agent(board_state):
        reshaped_board = board_state.reshape((1, board_state.size))
        prediction_array = model.predict(reshaped_board)
        move = np.argmax(prediction_array)
        return move
    
    return agent

# Make a simulation

In [None]:
agent = None
for i in range(20):
    print('\nGeneration {}'.format(i))
    print('\tplaying...')
    simulation = Simulation(loops=100, agent=agent, exploration=0.2)
    simulation.start()
    winner_moves = simulation.get_winner_moves()
    print('\twinner moves: {}'.format(len(winner_moves)))
    print('\ttraining...')
    agent =[make_agent(winner_moves, 'models/model_{}.h5'.format(i))]


Generation 0
	playing...
	winner moves: 30623
	training...
Train on 24529 samples, validate on 6094 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Generation 1
	playing...
