In [20]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import EvolutionBuilder
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from evoframe.experiment_results import *
from evoframe.utility import clean_experiment_directory

%matplotlib inline

In [21]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [22]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [9, 27, 18, 9]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid())

# Game-Model interface
def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    def softmax(x):
        """Compute softmax values for each sets of scores in x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    prediction = model.predict(game.board.flatten())[0]
    prediction_softmax = softmax(prediction)
    i_max = np.random.choice(list(range(len(prediction_softmax))), p=prediction_softmax)
    best_action = [1 if i == i_max else 0 for i in range(9)]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 50
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_BESTS_RANDOM) \
.with_keep_only(keep_only) \
.get()


# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_n_rewards_gradient", 0.1, 0.1, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.3, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.6, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 1, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.1, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 0.3, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 0.6, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 1, 0.5) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 100
num_epochs = 50
experiment_name = "tris"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

Epoch 1, best reward is 26
Epoch 2, best reward is 23
Epoch 3, best reward is 22
Epoch 4, best reward is 20
Epoch 5, best reward is 23
Epoch 6, best reward is 28
Epoch 7, best reward is 25
Epoch 8, best reward is 24
Epoch 9, best reward is 16
Epoch 10, best reward is 25
Epoch 11, best reward is 15
Epoch 12, best reward is 13
Epoch 13, best reward is 19
Epoch 14, best reward is 17
Epoch 15, best reward is 19
Epoch 16, best reward is 17
Epoch 17, best reward is 22
Epoch 18, best reward is 16
Epoch 19, best reward is 19
Epoch 20, best reward is 20
Epoch 21, best reward is 20
Epoch 22, best reward is 20
Epoch 23, best reward is 19
Epoch 24, best reward is 19
Epoch 25, best reward is 21
Epoch 26, best reward is 16
Epoch 27, best reward is 18
Epoch 28, best reward is 21
Epoch 29, best reward is 14
Epoch 30, best reward is 15
Epoch 31, best reward is 15
Epoch 32, best reward is 15
Epoch 33, best reward is 13
Epoch 34, best reward is 13
Epoch 35, best reward is 16
Epoch 36, best reward is 17
E

In [None]:
pop_size = 100
num_epochs = 500
experiment_name = "tris"

def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    def softmax(x):
        """Compute softmax values for each sets of scores in x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    prediction = model.predict(game.board.flatten())[0]
    prediction_softmax = softmax(prediction)
    i_max = np.random.choice(list(range(len(prediction_softmax))), p=prediction_softmax)
    best_action = [1 if i == i_max else 0 for i in range(9)]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Game-Model interface
def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    return model.predict(game.board.flatten())[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

In [23]:
plot_rewards(experiment_name)

In [24]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1))

interactive(children=(IntSlider(value=1, description='epoch', max=50, min=1), Checkbox(value=True, description…

<function evoframe.experiment_results.show_best_fnn_weights(experiment_name, epoch, with_bias=True)>

In [25]:
import plotly.express as px

def show_predictions(model, inp):
    game = Tris()
    game.board = inp
    out = np.array(model.predict(game))
    out = out.reshape((3,3))
    shape = out.shape
    data = [(row+1, col+1, out[row][col]) for row in range(shape[0]) for col in range(shape[1])]
    columns = ["neuron_input", "neuron_output", "value"]
    df = pd.DataFrame(data=data, columns=columns)
    return px.density_heatmap(df, x="neuron_output", y="neuron_input", z="value",
                                         histfunc="sum", color_continuous_scale="RdYlGn", range_color=(-2,2),
                                         nbinsx=shape[1], nbinsy=shape[0],
                                         range_x=(0.5, shape[1]+0.5), range_y=(0.5, shape[0]+0.5))

In [26]:
def get_random_input_func():
    return np.array([[np.random.choice([-1,0,1]) for i in range(3)] for j in range(3)])

In [27]:
model = agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size))
inp = get_random_input_func()
print(inp)
show_predictions(model, inp)

[[ 0 -1  1]
 [-1 -1  1]
 [ 0 -1 -1]]


In [28]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best")

In [29]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="last_best")

In [30]:
plot_behavioural_variances_to_input(experiment_name, get_random_input_func)

In [31]:
figs = plot_params_statistics(experiment_name)

In [32]:
figs[0]

In [33]:
figs[1]

In [34]:
figs[2]

In [35]:
figs[3]

In [36]:
plot_operators_statistics_means(experiment_name)

In [37]:
plot_operators_statistics_max(experiment_name)

In [38]:
plot_tournament(experiment_name, agent_wrapper_func, Tris, 1)[0]

In [None]:
class AgentHuman:
    def __init__(self):
        pass
    
    def predict(self, game):
        move = int(input("Select move: "))
        prediction = [1 if i == move else 0 for i in range(9)]
        return prediction

In [None]:
game = Tris()
game.play(AgentHuman(), agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size)), interactive=True)