In [1]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import EvolutionBuilder
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from evoframe.experiment_results import *
from evoframe.utility import clean_experiment_directory

%matplotlib inline

In [2]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [8]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [27, 9, 3, 1]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid(), with_bias=False)

# Game-Model interface
def predict_func(model, game):
    def softmax(x):
        """Compute softmax values for each sets of scores in x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    available_actions = game.get_available_actions()
    vs = []
    for a in available_actions:
        next_state = game.get_next_state(a)
        flattened_board = next_state.flatten()
        flattened_board_expanded = [[1,0,0] if cell == 1 else [0,1,0] if cell == 0 else [0,0,1] for cell in flattened_board]
        flattened_board_expanded = np.array([cell for expanded_cell in flattened_board_expanded for cell in expanded_cell])
        vs.append(model.predict(flattened_board_expanded)[0])
    board_softmax = softmax(np.array(vs))
    i_max = np.random.choice(list(range(len(board_softmax))), p=[p for l in board_softmax for p in l])
    best_action = available_actions[i_max]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 100
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_BESTS_IF_INCREASE) \
.with_keep_only(keep_only) \
.get()

# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_n_rewards_gradient", 0.1, 0.001, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.003, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.006, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.01, 1.) \
.add_operator("es_n_rewards_gradient", 0.1, 0.001, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 0.003, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 0.06, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 0.01, 0.5) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get() # learning rate, sigma, keep perc 
#.add_operator("es_1_copy", 0.1) \

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 100
num_epochs = 100
experiment_name = "tris_gradient_and_mutation"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

Epoch 1, best reward is 2
Epoch 2, best reward is 4
Epoch 3, best reward is 6
Epoch 4, best reward is 4
Epoch 5, best reward is 8
Epoch 6, best reward is 5
Epoch 7, best reward is 8
Epoch 8, best reward is 7
Epoch 9, best reward is 6
Epoch 10, best reward is 8
Epoch 11, best reward is 7
Epoch 12, best reward is 10
Epoch 13, best reward is 10
Epoch 14, best reward is 8
Epoch 15, best reward is 7
Epoch 16, best reward is 7
Epoch 17, best reward is 6
Epoch 18, best reward is 8
Epoch 19, best reward is 10
Epoch 20, best reward is 10
Epoch 21, best reward is 9
Epoch 22, best reward is 11
Epoch 23, best reward is 8
Epoch 24, best reward is 9
Epoch 25, best reward is 12
Epoch 26, best reward is 8
Epoch 27, best reward is 9
Epoch 28, best reward is 11
Epoch 29, best reward is 10
Epoch 30, best reward is 13
Epoch 31, best reward is 13
Epoch 32, best reward is 9
Epoch 33, best reward is 10
Epoch 34, best reward is 11
Epoch 35, best reward is 10
Epoch 36, best reward is 10
Epoch 37, best reward i