In [1]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import EvolutionBuilder
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

from evoframe.experiment_results import *
from evoframe.utility import clean_experiment_directory

%matplotlib inline

In [2]:
import imageio

def to_gif(experiment_name, num_epochs, duration=0.5):
    for i in range(1, num_epochs+1):
        print(i)
        f = show_best_fnn_weights(experiment_name, i)
        b = f.to_image(format="png")
        with open("images/image{}.png".format(i), "wb") as fi:
            fi.write(b)
    images = []
    filenames = ["images/image{}.png".format(i) for i in range(1, num_epochs+1)]
    for i,filename in enumerate(filenames):
        print(i)
        images.append(imageio.imread(filename))
    imageio.mimsave('nn.gif', images, duration=duration)

# Guess the point

In [None]:
from evoframe.games import GuessPoint

In [None]:
# Game
game_creation_func = lambda context: GuessPoint(np.array([0.2,0.8,0.5]), np.array([0.4, 0.5, 10]))

# Game-Model interface
predict_func = lambda model, game: model.predict(game.inputs)
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.get()

# Model
layer_sizes = [3, 5, 3]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_relu(), ActivationFunctions.get_id())

# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 1, 0.3, 0.3) \
.add_operator("es_2_crossover", 0.01, 0.8) \
.add_operator("es_1_copy", 0.01) \
.add_operator("es_n_rewards_gradient", 0.01) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

experiment_name = "guesspoint"
pop_size = 200
num_epochs = 10
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

In [None]:
plot_rewards(experiment_name)

In [None]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1))

In [None]:
inp = np.array([0.2,0.8,0.5])

def get_random_input_func():
    global inp
    return inp

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best", iterations=1)

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="last_best", iterations=1)

In [None]:
plot_params_similarity(experiment_name, only_best=False)

In [None]:
plot_params_similarity(experiment_name, only_best=True)

In [None]:
plot_params_statistics(experiment_name)

# Guess the function

In [None]:
from evoframe.games import GuessFunction

In [None]:
# Game
game_func = lambda i: np.array([2*i[0]-3*i[1]+4, i[1]-8*i[2]-5])
input_dim = 3
input_domains = [(-1,1),(-1,1),(3,7)]
sample_every = [0.1, 0.1, 0.3]
game_creation_func = lambda context: GuessFunction(game_func, input_dim, input_domains, sample_every)

# Model
layer_sizes = [3, 5, 2]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_arctan(), ActivationFunctions.get_id())

# Game-Model interface
predict_func = lambda model, inputs: model.predict(inputs)
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.get()

# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.2, 0.1, 0.1) \
.add_operator("es_1_mutation", 0.2, 0.1, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.3, 0.3) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.1) \
.add_operator("es_2_crossover", 0.1, 0.8) \
.add_operator("es_1_copy", 0.1) \
.add_operator("es_n_rewards_gradient", 0.1) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 40
num_epochs = 100
experiment_name = "guessfunction"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

In [None]:
plot_rewards(experiment_name)

In [None]:
test_array = np.array([0.5,0.5,5])
pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size).predict(test_array), game_func(test_array)

In [None]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1))

In [None]:
plot_params_statistics(experiment_name)

In [None]:
input_domains = [(-1,1),(-1,1),(3,7)]
sample_every = [0.1, 0.1, 0.3]

def get_random_input_func():
    global input_domains
    global sample_every
    inp = []
    for i,input_domain in enumerate(input_domains):
        point_i = np.random.randint(low=0, high=int((input_domain[1] - input_domain[0])/sample_every[i]))
        point = input_domain[0] + sample_every[i] * point_i
        inp.append(point)
    inp = np.array(inp)
    return inp

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best")

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="last_best")

In [None]:
plot_behavioural_variances_to_input(experiment_name, get_random_input_func)

In [None]:
plot_params_similarity(experiment_name, only_best=False, iterations=600)

In [None]:
plot_params_similarity(experiment_name, only_best=True, iterations=600)

# Play Tris, 9 inputs

In [None]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [None]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [9, 18, 9]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid())

# Game-Model interface
def predict_func(model, game):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    return model.predict(game.board.flatten())[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 40
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_CURRENT_POP) \
.with_keep_only(keep_only) \
.with_gradient_operator_reward(0.5) \
.with_weight_normalization(10, 0.1) \
.get()


# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.2, 0.1, 0.1) \
.add_operator("es_1_mutation", 0.2, 0.1, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.3, 0.3) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.1) \
.add_operator("es_2_crossover", 0.1, 0.8) \
.add_operator("es_1_copy", 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.05) \
.add_operator("es_n_rewards_gradient", 0.1, 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.3) \
.add_operator("es_n_rewards_gradient", 0.1, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 1.0) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 20
num_epochs = 10
experiment_name = "tris_vs_cur_pop_with_MOO_gradient_weight_reg"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

In [None]:
plot_rewards(experiment_name, epochs=list(range(1, 11)))

In [None]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1))

In [None]:
class AgentHuman:
    def __init__(self):
        pass
    
    def predict(self, board):
        move = int(input("Select move: "))
        prediction = [1 if i == move else 0 for i in range(9)]
        return prediction

In [None]:
# Game-Model interface
def predict_func(model, inputs):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    return model.predict(inputs.flatten())[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

In [None]:
game = Tris()
game.play(AgentHuman(), agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size)), interactive=True)

In [None]:
import plotly.express as px

def show_predictions(model, inp):
    out = agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size)).predict(inp)
    out = out.reshape((3,3))
    shape = out.shape
    data = [(row+1, col+1, out[row][col]) for row in range(shape[0]) for col in range(shape[1])]
    columns = ["neuron_input", "neuron_output", "value"]
    df = pd.DataFrame(data=data, columns=columns)
    return px.density_heatmap(df, x="neuron_output", y="neuron_input", z="value",
                                         histfunc="sum", color_continuous_scale="RdYlGn", range_color=(-2,2),
                                         nbinsx=shape[1], nbinsy=shape[0],
                                         range_x=(0.5, shape[1]+0.5), range_y=(0.5, shape[0]+0.5))

In [None]:
def get_random_input_func():
    return np.array([[np.random.choice([-1,0,1]) for i in range(3)] for j in range(3)])

In [None]:
model = agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size))
inp = get_random_input_func()
print(inp)
show_predictions(model, inp)

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best")

In [None]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="last_best")

In [None]:
plot_params_similarity(experiment_name, only_best=False, iterations=1000)

In [None]:
plot_params_similarity(experiment_name, only_best=True, iterations=1000)

In [None]:
plot_behavioural_variances_to_input(experiment_name, get_random_input_func)

In [None]:
figs = plot_params_statistics(experiment_name)

In [None]:
figs[0]

In [None]:
figs[1]

In [None]:
figs[2]

In [None]:
figs[3]

# Tris with state-value function

In [3]:
from evoframe.reward_builders.reward_builder_game import TournamentMode
from evoframe.games import Tris

In [None]:
# Game
game_creation_func = lambda context: Tris()

# Model
layer_sizes = [9, 18, 1]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_id())

# Game-Model interface
def predict_func(model, game):
    available_actions = game.get_available_actions()
    vs = []
    for a in available_actions:
        next_state = game.get_next_state(a)
        vs.append(model.predict(next_state.flatten())[0])
    i_max = np.array(vs).argmax()
    best_action = available_actions[i_max]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
keep_only = 40
reward_func, get_context_func = RewardBuilderGame() \
.with_game_creation_func(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_PEAKS) \
.with_keep_only(keep_only) \
.get()
#.with_gradient_operator_reward(0.5) \
#.with_weight_normalization(10, 0.1) \

# Update population function
get_new_pop_func = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.2, 0.1, 0.1) \
.add_operator("es_1_mutation", 0.2, 0.1, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.3, 0.3) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.5) \
.add_operator("es_1_mutation", 0.2, 0.5, 0.1) \
.add_operator("es_2_crossover", 0.1, 0.8) \
.add_operator("es_1_copy", 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.05) \
.add_operator("es_n_rewards_gradient", 0.1, 0.1) \
.add_operator("es_n_rewards_gradient", 0.1, 0.3) \
.add_operator("es_n_rewards_gradient", 0.1, 0.5) \
.add_operator("es_n_rewards_gradient", 0.1, 1.0) \
.add_selector_func(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Evolution function
evolution_func = EvolutionBuilder() \
    .with_get_model_func(get_model_func) \
    .with_reward_func(reward_func) \
    .with_get_new_pop_func(get_new_pop_func) \
    .with_get_context_func(get_context_func) \
    .get()

pop_size = 200
num_epochs = 200
experiment_name = "tris_state_value_function"
clean_experiment_directory(experiment_name)
evolution_func(experiment_name, pop_size, num_epochs, num_threads=8)

In [22]:
pop_size = 200
num_epochs = 200
experiment_name = "tris_state_value_function"
def predict_func(model, game):
    available_actions = game.get_available_actions()
    vs = []
    for a in available_actions:
        next_state = game.get_next_state(a)
        vs.append(model.predict(next_state.flatten())[0])
    i_max = np.array(vs).argmax()
    best_action = available_actions[i_max]
    return best_action
agent_wrapper_func = get_agent_wrapper_func(predict_func)

In [18]:
plot_rewards(experiment_name)

In [6]:
interact(show_best_fnn_weights,
         experiment_name=fixed(experiment_name),
         epoch=widgets.IntSlider(min=1, max=num_epochs, step=1, value=1))

interactive(children=(IntSlider(value=1, description='epoch', max=200, min=1), Output()), _dom_classes=('widge…

<function evoframe.experiment_results.show_best_fnn_weights(experiment_name, epoch)>

In [23]:
class AgentHuman:
    def __init__(self):
        pass
    
    def predict(self, board):
        move = int(input("Select move: "))
        prediction = [1 if i == move else 0 for i in range(9)]
        return prediction

In [24]:
game = Tris()
game.play(AgentHuman(), agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size)), interactive=True)

0 0 0 
0 0 0 
0 0 0 
------------------------------
Select move: 0
1 0 0 
0 0 0 
0 0 0 
------------------------------
1 0 0 
0 0 0 
-1 0 0 
------------------------------
Select move: 1
1 1 0 
0 0 0 
-1 0 0 
------------------------------
1 1 0 
0 -1 0 
-1 0 0 
------------------------------
Select move: 3
1 1 0 
1 -1 0 
-1 0 0 
------------------------------
1 1 0 
1 -1 -1 
-1 0 0 
------------------------------
Select move: 0
1 1 1 
1 -1 -1 
-1 0 0 
------------------------------
1 1 1 
1 -1 -1 
-1 0 0 
------------------------------


(1, -1)

In [28]:
# import plotly.express as px

def show_predictions(model, inp):
    out = model.predict(inp)
    out = out.reshape((3,3))
    shape = out.shape
    data = [(row+1, col+1, out[row][col]) for row in range(shape[0]) for col in range(shape[1])]
    columns = ["neuron_input", "neuron_output", "value"]
    df = pd.DataFrame(data=data, columns=columns)
    return px.density_heatmap(df, x="neuron_output", y="neuron_input", z="value",
                                         histfunc="sum", color_continuous_scale="RdYlGn", range_color=(-2,2),
                                         nbinsx=shape[1], nbinsy=shape[0],
                                         range_x=(0.5, shape[1]+0.5), range_y=(0.5, shape[0]+0.5))

In [29]:
def get_random_input_func():
    return np.array([[np.random.choice([-1,0,1]) for i in range(3)] for j in range(3)])

In [30]:
model = agent_wrapper_func(pickle_load_best_model_of_epoch(experiment_name, num_epochs, pop_size))
inp = get_random_input_func()
print(inp)
show_predictions(model, inp)

[[ 0  1  1]
 [ 0 -1 -1]
 [-1  0  0]]


AttributeError: 'numpy.ndarray' object has no attribute 'get_available_actions'

In [31]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="first_best")

In [32]:
plot_behavioural_differences(experiment_name, get_random_input_func, mode="last_best")

In [None]:
plot_params_similarity(experiment_name, only_best=False, iterations=1000)

In [33]:
plot_params_similarity(experiment_name, only_best=True, iterations=1000)

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 200 samples in 0.000s...
[t-SNE] Computed neighbors for 200 samples in 0.023s...
[t-SNE] Computed conditional probabilities for sample 200 / 200
[t-SNE] Mean sigma: 5.497232
[t-SNE] KL divergence after 250 iterations with early exaggeration: 48.965408
[t-SNE] KL divergence after 750 iterations: 0.046903


In [34]:
plot_behavioural_variances_to_input(experiment_name, get_random_input_func)

In [36]:
figs = plot_params_statistics(experiment_name)

In [37]:
figs[0]

In [38]:
figs[1]

In [39]:
figs[2]

In [40]:
figs[3]

# Monkeywars

In [None]:
import numpy as np
from evoframe.games import Game

class Tris(Game):
    """Player1 starts. Rewards of both players are returned."""
    PLAYER_1 = 1
    PLAYER_2 = -1
    EMPTY = 0
    DRAW = 0
    CONTINUE = 2
    PLAYERS = [PLAYER_1, PLAYER_2]

    def __init__(self):
        self.board = np.array([np.array([self.EMPTY for i in range(3)]) for j in range(3)])

    def check_win(self):
        # check rows
        board = self.board
        for row in board:
            for player in self.PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player

        # check cols
        board = self.board.transpose()
        for row in board:
            for player in self.PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player

        # check diagonals
        diags = []
        diags.append(np.array([board[i][i] for i in range(3)]))
        diags.append(np.array([board[i][3 - i - 1] for i in range(3)]))
        for row in diags:
            for player in self.PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player

        # check draw
        exist_empty = False
        for row in self.board:
            for cell in row:
                if cell == self.EMPTY:
                    exist_empty = True
        if not exist_empty:
            return self.DRAW

        return self.CONTINUE

    def extract_move(self, prediction):
        highest_value = -100000
        highest_value_index = -1
        for i,pred in enumerate(prediction):
            if self.board[i//3][i%3] == self.EMPTY and pred > highest_value:
                highest_value = pred
                highest_value_index = i
        return highest_value_index

    def do_move(self, move, player):
        self.board[move//3][move%3] = player

    def opposite_board(self):
        return np.array([np.array([self.PLAYER_2 if self.board[row][col] == self.PLAYER_1
                                   else self.PLAYER_1 if self.board[row][col] == self.PLAYER_2
                                   else self.EMPTY for col in range(3)]) for row in range(3)])

    def play(self, agent_1, agent_2, interactive=False):
        player_turn = self.PLAYER_1

        if interactive:
            self.print_board()

        result = self.check_win()
        while result == self.CONTINUE:
            if player_turn == self.PLAYER_1:
                prediction = agent_1.predict(self.board)
            else:
                prediction = agent_2.predict(self.opposite_board())

            move = self.extract_move(prediction)
            self.do_move(move, player_turn)

            if interactive:
                self.print_board()

            if player_turn == self.PLAYER_1:
                player_turn = self.PLAYER_2
            else:
                player_turn = self.PLAYER_1

            result = self.check_win()

        if interactive:
                self.print_board()

        opponent_result = self.PLAYER_2 if result == self.PLAYER_1 else self.PLAYER_1 if result == self.PLAYER_2 else self.DRAW
        return result, opponent_result

    def print_board(self):
        for row in self.board:
            for cell in row:
                print(cell, end=" ")
            print("")
        print("-"*30)
