In [3]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import EvolutionBuilder
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from evoframe.experiment_results import *
from evoframe.utility import clean_experiment_directory

%matplotlib inline

In [4]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [5]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [9, 6, 1]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid())

# Game-Model interface
def predict_func(model, game):
    available_actions = game.get_available_actions()
    vs = []
    for a in available_actions:
        next_state = game.get_next_state(a)
        vs.append(model.predict(next_state.flatten())[0])
    i_max = np.array(vs).argmax()
    best_action = available_actions[i_max]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 10
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_PEAKS) \
.with_keep_only(keep_only) \
.get()
#.with_gradient_operator_reward(0.5) \

# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.2, 0.1, 0.1) \
.add_operator("es_1_mutation", 0.2, 0.3, 0.1) \
.add_operator("es_1_mutation", 0.2, 0.1, 0.3) \
.add_operator("es_1_mutation", 0.2, 0.05, 0.3) \
.add_operator("es_2_crossover", 0.05, 0.8) \
.add_operator("es_1_copy", 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.05) \
.add_operator("es_n_rewards_gradient", 0.1, 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.2) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 100
num_epochs = 1000
experiment_name = "tris_state_value_function"

In [21]:
def get_random_input_func():
    return np.array([[np.random.choice([-1,0,1]) for i in range(3)] for j in range(3)])

In [23]:
from evoframe.utility import *

In [63]:
#epochs = list(range(1, len(pickle_load_rewards(experiment_name)) // pop_size))
epochs = list(range(1,700))
plot_behavioural_variances_to_input(experiment_name, get_random_input_func, epochs=epochs)

UnpicklingError: pickle data was truncated

In [57]:
plot_rewards(experiment_name, epochs=epochs)

UnpicklingError: pickle data was truncated