In [1]:
from evoframe.reward_builders import RewardBuilderGame
from evoframe.population_update_builders import PopulationUpdateBuilderStatic
from evoframe.selector_function import SelectorFunctionFactory
from evoframe import PopulationManager
from evoframe.models import FeedForwardNetwork
from evoframe.models import ActivationFunctions
from evoframe.games import Game
from evoframe import get_agent_wrapper_func
import evoframe.func_with_context as fwc

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def plot_rewards(env):
    num_epochs = len(env["rewards"].keys())
    xs = np.arange(num_epochs)
    ys = [rs[0] for rs in env["rewards"].values()]
    plt.plot(xs, ys)

In [3]:
import collections

def recursively_default_dict():
    return collections.defaultdict(recursively_default_dict)

In [4]:
def plot_delta_rewards_by_operator(context, hist_range, save=False):
    num_epochs = len(context["epochs"])
    is_first = True
    all_operators = list(set(context["epochs"][2]["operators"]))
    num_cols = len(all_operators)
    num_rows = num_epochs - 1

    plt.figure(figsize=(16, num_rows*4))

    for i_epoch, epoch in context["epochs"].items():
        models = epoch["models"]
        rewards = epoch["rewards"]
        operators = epoch["operators"]

        if is_first:
            is_first = False
        else:
            delta_rewards = [r - prev_best_rewards for r in rewards]
            d_rewards = {op:[] for op in all_operators}
            for i in range(len(models)):
                d_rewards[operators[i]] += [delta_rewards[i]]
            for i,op in enumerate(all_operators):
                cell = (i_epoch - 2)*3 + i + 1
                plt.subplot(num_rows, num_cols, cell)
                plt.title("Epoch {} - Operator {}".format(i_epoch, op))
                plt.hist(d_rewards[op], 50, range=hist_range)

        prev_best_rewards = max(rewards)

    if save:
        plt.savefig("guessfunction.png")
    else:
        plt.show()

# Guess the point

In [None]:
from evoframe.games import GuessPoint

In [None]:
# Define global context
context = recursively_default_dict()

# Game
game_creation_func = lambda context: GuessPoint(np.array([0.2,0.8,0.5]), np.array([0.4, 0.5, 10]))
game_creation_func = fwc.func_with_context(game_creation_func, context=context)

# Model
layer_sizes = [3, 5, 3]
get_model_func = lambda context: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_id())
get_model_func = fwc.func_with_context(get_model_func, context=context)

# Game-Model interface
predict_func = lambda model, inputs: model.predict(inputs)
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
reward_function = RewardBuilderGame() \
.with_game_creation_function(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_context(context) \
.get()

# Update population function
mutation_perc = 0.6
crossover_perc = 0.3
copy_perc = 0.1
get_new_pop_f = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", mutation_perc, 0.3, 0.3) \
.add_operator("es_2_crossover", crossover_perc, 0.8) \
.add_operator("es_1_copy", copy_perc) \
.add_selector_f(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.with_context(context) \
.get()

# Population manager
pop_size = 500
pm = PopulationManager(pop_size, get_model_func, reward_function, get_new_pop_f, context)

# Run population manager
num_epochs = 10
last_pop = pm.run(num_epochs)

In [None]:
plot_delta_rewards_by_operator(context, hist_range=(-20, 5))

# Guess the function

In [5]:
from evoframe.games import GuessFunction

In [6]:
# Define global context
context = recursively_default_dict()

# Game
game_func = lambda i: np.array([2*i[0]-3*i[1]+4, i[1]-8*i[2]-5])
input_dim = 3
input_domains = [(-1,1),(-1,1),(3,7)]
sample_every = [0.1, 0.1, 0.3]
game_creation_function = lambda context: GuessFunction(game_func, input_dim, input_domains, sample_every)
game_creation_function = fwc.func_with_context(game_creation_function, context=context)

# Model
layer_sizes = [3, 5, 2]
get_model_func = lambda context: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_arctan(), ActivationFunctions.get_id())
get_model_func = fwc.func_with_context(get_model_func, context=context)

# Game-Model interface
predict_func = lambda model, inputs: model.predict(inputs)
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
reward_function = RewardBuilderGame() \
.with_game_creation_function(game_creation_function) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_context(context) \
.get()

# Update population function
get_new_pop_f = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.6, 0.3, 0.5) \
.add_operator("es_2_crossover", 0.3, 0.1) \
.add_operator("es_1_copy", 0.1) \
.add_selector_f(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.with_context(context) \
.get()

# Population manager
pop_size = 50
pm = PopulationManager(pop_size, get_model_func, reward_function, get_new_pop_f, context)

# Run population manager
num_epochs = 30
pm.run(num_epochs)

Epoch 1, best reward is -8108260.599823801
Epoch 2, best reward is -7088138.462857202


KeyboardInterrupt: 

In [None]:
plot_delta_rewards_by_operator(context, hist_range=(-5000000, 1000000), save=True)

In [None]:
test_array = np.array([0.5,0.5,5])
context["epochs"][num_epochs]["models"][0].predict(test_array), game_func(test_array)

# Play Tris

In [7]:
from evoframe.reward_builders.reward_builder_game import TournamentMode

In [8]:
class Tris(Game):
    PLAYER_1 = 1
    PLAYER_2 = -1
    EMPTY = 0
    DRAW = 0
    CONTINUE = 2
    PLAYERS = [PLAYER_1, PLAYER_2]
    
    def __init__(self):
        self.board = np.array([np.array([Tris.EMPTY for i in range(3)]) for j in range(3)])
        
    def check_win(self):
        # check rows
        board = self.board
        for row in board:
            for player in PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player
        
        # check cols
        board = self.board.transpose()
        for row in board:
            for player in PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player
                
        # check diagonals
        diags = []
        diags.append(np.array([board[i][i] for i in range(3)]))
        diags.append(np.array([board[i][3 - i - 1] for i in range(3)]))
        for row in diags:
            for player in PLAYERS:
                if np.all(np.equal(row, np.full(3, player))):
                    return player
        
        # check draw
        exist_empty = False
        for row in self.board:
            for cell in row:
                if cell == EMPTY:
                    exist_empty = True
        if not exist_empty:
            return DRAW
        
        return CONTINUE
    
    def extract_move(self, prediction):
        highest_value = -100000
        highest_value_index = -1
        for i,pred in enumerate(prediction):
            if self.board[i//3][i%3] == EMPTY and pred > highest_value:
                highest_value = pred
                highest_value_index = i
        return highest_value_index
    
    def do_move(self, move, player):
        self.board[move//3][move%3] = player
        
    def opposite_board(self):
        return np.array([np.array([PLAYER_2 if self.board[row][col] == PLAYER_1
                                   else PLAYER_1 if self.board[row][col] == PLAYER_2
                                   else EMPTY for col in range(3)]) for row in range(3)])
    
    def play(self, agent_1, agent_2, interactive=False):
        player_turn = np.random.choice(PLAYERS)
        
        if interactive:
            self.print_board() 
        
        result = self.check_win()
        while result == CONTINUE:
            if player_turn == PLAYER_1:
                prediction = agent_1.predict(self.board)
            else:
                prediction = agent_2.predict(self.opposite_board())
              
            move = self.extract_move(prediction)
            self.do_move(move, player_turn)
            
            if interactive:
                self.print_board()
            
            if player_turn == PLAYER_1:
                player_turn = PLAYER_2
            else:
                player_turn = PLAYER_1
                
            result = self.check_win()
            
        if interactive:
                self.print_board()    
        
        return result
    
    def print_board(self):
        for row in self.board:
            for cell in row:
                print(cell, end=" ")
            print("")
        print("-"*30)

In [11]:
# Define global context
context = recursively_default_dict()

# Game
game_creation_function = lambda context: Tris()
game_creation_function = fwc.func_with_context(game_creation_function, context=context)

# Model
layer_sizes = [9, 5, 5, 9]
get_model_func = lambda context: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid())
get_model_func = fwc.func_with_context(get_model_func, context=context)

# Game-Model interface
def predict_func(model, inputs):
    # In Tris, 'inputs' is a 3x3 np.array, thus we must flatten it to a 9x1 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    return model.predict(inputs.flatten())[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
reward_function = RewardBuilderGame() \
.with_game_creation_function(game_creation_function) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_BEST_OF_EACH_GEN) \
.with_keep_only(30) \
.with_context(context) \
.get()

# Update population function
get_new_pop_f = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.6, 0.3, 0.5) \
.add_operator("es_2_crossover", 0.3, 0.1) \
.add_operator("es_1_copy", 0.1) \
.add_selector_f(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.with_context(context) \
.get()

# Population manager
pop_size = 10
pm = PopulationManager(pop_size, get_model_func, reward_function, get_new_pop_f, context)

# Run population manager
num_epochs = 100
pm.run(num_epochs)

AttributeError: module 'evoframe.func_with_context' has no attribute 'CONTEXT_KEY_INDIVIDUALS'

In [None]:
plot_rewards(env)

In [None]:
# Game
game_creation_function = lambda: Tris()

# Model
layer_sizes = [18, 9, 9]
get_model_func = lambda: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_sigmoid())

# Game-Model interface
def predict_func(model, inputs):
    # In Tris, 'inputs' is a 3x3 np.array
    # output is a 9x1 vector, where the cell with highest value corresponding
    # to a valid move will be the chosen move
    board = inputs
    inputs = []
    for row in board:
        for cell in row:
            if cell == Tris.PLAYER_1:
                inputs += [1, 0]
            elif cell == Tris.PLAYER_2:
                inputs += [0, 1]
            else:
                inputs += [0, 0]
    return model.predict(np.array(inputs))[0]
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
# Since Tris is a two-players game, we can compute the reward with a tournament against the current generation
reward_function, update_env_f = RewardBuilderGame() \
.with_game_creation_function(game_creation_function) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_competitive_tournament(TournamentMode.VS_BEST_OF_EACH_GEN) \
.with_keep_only(30) \
.get()

# Update population function
get_new_pop_f = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", 0.6, 0.3, 0.5) \
.add_operator("es_2_crossover", 0.3, 0.1) \
.add_operator("es_1_copy", 0.1) \
.add_selector_f(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.get()

# Population manager
pop_size = 100
pm = PopulationManager(pop_size, get_model_func, reward_function, update_env_f, get_new_pop_f)

# Run population manager
num_epochs = 300
last_pop, env = pm.run(num_epochs)

In [None]:
plot_rewards(env)

In [None]:
class AgentHuman:
    def __init__(self):
        pass
    
    def predict(self, board):
        move = int(input("Select move: "))
        prediction = [1 if i == move else 0 for i in range(9)]
        return prediction

In [None]:
game = Tris()
game.play(AgentHuman(), agent_wrapper_func(last_pop[0]), interactive=True)

# Func with context

In [None]:
# Define global context
context = recursively_default_dict()

# Game
game_creation_func = lambda context: GuessPoint(np.array([0.2,0.8,0.5]), np.array([0.4, 0.5, 10]))
game_creation_func = fwc.func_with_context(game_creation_func, context=context)

# Model
layer_sizes = [3, 5, 3]
get_model_func = lambda context: FeedForwardNetwork(layer_sizes, ActivationFunctions.get_sigmoid(), ActivationFunctions.get_id())
get_model_func = fwc.func_with_context(get_model_func, context=context)

# Game-Model interface
predict_func = lambda model, inputs: model.predict(inputs)
agent_wrapper_func = get_agent_wrapper_func(predict_func)

# Reward function and update env function
reward_function = RewardBuilderGame() \
.with_game_creation_function(game_creation_func) \
.with_agent_wrapper_func(agent_wrapper_func) \
.with_context(context) \
.get()

# Update population function
mutation_perc = 0.6
crossover_perc = 0.3
copy_perc = 0.1
get_new_pop_f = PopulationUpdateBuilderStatic() \
.add_operator("es_1_mutation", mutation_perc, 0.3, 0.3) \
.add_operator("es_2_crossover", crossover_perc, 0.8) \
.add_operator("es_1_copy", copy_perc) \
.add_selector_f(SelectorFunctionFactory.get_geometric_selector_function(0.3)) \
.with_context(context) \
.get()

# Population manager
pop_size = 500
pm = PopulationManager(pop_size, get_model_func, reward_function, get_new_pop_f, context)

# Run population manager
num_epochs = 10
last_pop = pm.run(num_epochs)