In [None]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import EvolutionBuilder
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from evoframe.experiment_results import *
from evoframe.utility import clean_experiment_directory

%matplotlib inline

In [None]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [None]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [27, 18, 12, 9]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid(), with_bias=False)

# Game-Model interface
def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    flattened_board = game.board.flatten()
    flattened_board_expanded = [[1,0,0] if cell == 1 else [0,1,0] if cell == 0 else [0,0,1] for cell in flattened_board]
    flattened_board_expanded = np.array([cell for expanded_cell in flattened_board_expanded for cell in expanded_cell])
    return model.predict(flattened_board_expanded)[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 40
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_BESTS_RANDOM) \
.with_keep_only(keep_only) \
.with_weight_normalization(2,2) \
.get()


# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_copy", 0.1) \
.add_operator("es_1_mutation", 0.1, 0.1, 0.1) \
.add_operator("es_1_mutation", 0.1, 0.1, 0.5) \
.add_operator("es_1_mutation", 0.1, 0.3, 0.3) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.01, 0.1, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.01, 0.2, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.01, 0.3, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.05, 0.1, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.05, 0.2, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.05, 0.3, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.1, 0.1, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.1, 0.2, 1) \
.add_operator("es_n_rewards_gradient_and_mutation", 0.1, 0.1, 0.3, 1) \
.add_operator("es_n_rewards_gradient", 0.01) \
.add_operator("es_n_rewards_gradient", 0.05) \
.add_operator("es_n_rewards_gradient", 0.1) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 100
num_epochs = 10
experiment_name = "tris"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

In [None]:
pop_size = 100
num_epochs = 10
experiment_name = "tris"

In [None]:
def get_random_input_func():
    board = np.array([[np.random.choice([-1,0,1]) for i in range(3)] for j in range(3)])
    board_flattened = board.flatten()
    board_repr = [[1,0,0] if cell == 1 else [0,1,0] if cell == 0 else [0,0,1] for cell in board_flattened]
    board_repr_expanded = np.array([cell for expanded_cell in board_repr for cell in expanded_cell])
    return board_repr_expanded

In [None]:
plot_behavioural_variances_to_input(experiment_name, get_random_input_func)

In [None]:
import plotly.express as px

def show_predictions(model, inp):
    game = Tris()
    game.board = inp
    out = np.array(model.predict(game))
    out = out.reshape((3,3))
    shape = out.shape
    data = [(row+1, col+1, out[row][col]) for row in range(shape[0]) for col in range(shape[1])]
    columns = ["neuron_input", "neuron_output", "value"]
    df = pd.DataFrame(data=data, columns=columns)
    return px.density_heatmap(df, x="neuron_output", y="neuron_input", z="value",
                                         histfunc="sum", color_continuous_scale="RdYlGn", range_color=(-2,2),
                                         nbinsx=shape[1], nbinsy=shape[0],
                                         range_x=(0.5, shape[1]+0.5), range_y=(0.5, shape[0]+0.5))

In [None]:
# Game-Model interface
def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    flattened_board = game.board.flatten()
    flattened_board_expanded = [[1,0,0] if cell == 1 else [0,1,0] if cell == 0 else [0,0,1] for cell in flattened_board]
    flattened_board_expanded = np.array([cell for expanded_cell in flattened_board_expanded for cell in expanded_cell])
    return model.predict(flattened_board_expanded)[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

In [None]:
model = agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size))
inp = get_random_input_func()
inp_repr = []
print(inp)
for i in range(9):
    sublist = inp[i*3:(i*3)+3]
    inp_repr.append(1 if sublist.tolist() == [1,0,0] else 0 if sublist.tolist() == [0,1,0] else -1)
inp_repr = [inp_repr[x:x+3] for x in range(0, len(inp_repr), 3)]
for i in range(3):
    for j in range(3):
        print(inp_repr[2-i][j], end=" ")
    print("")
show_predictions(model, np.array(inp_repr))

In [None]:
plot_rewards(experiment_name)

In [None]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1),
         with_bias=fixed(False))

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best")

In [None]:
figs = plot_params_statistics(experiment_name, with_bias=False)

In [None]:
figs[0]

In [None]:
figs[1]

In [None]:
figs[2]

In [None]:
figs[3]

In [None]:
plot_tournament(experiment_name, agent_wrapper_func, Tris, 5)[0]

In [None]:
plot_operators_statistics_means(experiment_name)

In [None]:
plot_operators_statistics_max(experiment_name)

In [None]:
class AgentHuman:
    def __init__(self):
        pass
    
    def predict(self, game):
        move = int(input("Select move: "))
        prediction = [1 if i == move else 0 for i in range(9)]
        return prediction

In [None]:
game = Tris()
game.play(AgentHuman(), agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size)), interactive=True)