Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


In [71]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
import math
import numpy as np
import random

## The *Nim* and *Nimply* classes

In [72]:
Nimply = namedtuple("Nimply", "row, num_objects")
Individual = namedtuple("Individual", "genome, fitness")

In [73]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


In [74]:
def pure_random(Individual: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(Individual.rows) if c > 0])
    num_objects = random.randint(1, Individual.rows[row])
    return Nimply(row, num_objects)


In [75]:
def adaptive_maker(genome, type):    
    if type == "Simple":
        def adaptive(state):

            _rows = np.array(state.rows).astype(bool)
            n = np.sum(_rows)
            index_unnormalized = math.floor(n* genome["index"])
            index = -1
            for i in range(len(_rows)):
                if _rows[i]:
                    index += 1
                if index == index_unnormalized:
                    index = i
                    break
            quantity = max(math.floor(state.rows[index]* genome["quantity"]), 1)
            return Nimply(index, quantity)
        return adaptive
    
    elif type == "Choice":
        def adaptive(state): 
                _rows = np.array(state.rows)
                n = np.sum(_rows.astype(bool))
                if max(_rows/ (len(2*_rows - 1))) > genome["threshold"]:
                    index_unnormalized = math.floor(n* genome["first_index"])
                    index = -1
                    for i in range(len(_rows)):
                        if _rows[i]:
                            index += 1
                        if index == index_unnormalized:
                            index = i
                            break
                    quantity = max(math.floor(state.rows[index]* genome["first_quantity"]), 1)
                    return Nimply(index, quantity)
                else:
                    index_unnormalized = math.floor(n* genome["second_index"])
                    index = -1
                    for i in range(len(_rows)):
                        if _rows[i]:
                            index += 1
                        if index == index_unnormalized:
                            index = i
                            break
                    quantity = max(math.floor(state.rows[index]* genome["second_quantity"]), 1)
                return Nimply(index, quantity)
        return adaptive    
    
    elif type == "NN":
        def adaptive(state):

            weights1 = genome["weights1"]
            bias1 = genome["bias1"]
            weights2 = genome["weights2"]
            bias2 = genome["bias2"]

            _rows = np.array(state.rows)


            hidden = np.dot(_rows, weights1) + bias1
            hidden = np.maximum(hidden, 0)
            output = np.dot(hidden, weights2) + bias2

            output = np.maximum(output, 0)
            output[0] = np.minimum(output[0], 0.9999999999999999)
            output[1] = np.minimum(output[1], 1.0)

            _rows = np.array(state.rows).astype(bool)
            n = np.sum(_rows)
            index_unnormalized = math.floor(n* output[0])
            index = -1
            for i in range(len(_rows)):
                if _rows[i]:
                    index += 1
                if index == index_unnormalized:
                    index = i
                    break
            quantity = max(math.floor(state.rows[index]* output[1]), 1)

            return Nimply(index, quantity)
    
        return adaptive



In [76]:
def plays(strategy, n=5):
    nim = Nim(n)
    player = 0
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player
    
    return player

In [77]:
def fitness_maker(type, n, random):
    if random:
        def fitness(genome):
            strategy = (pure_random, adaptive_maker(genome.genome, type))

            win=0
            for _ in range(n):
                win+=plays(strategy)
        
            return Individual(genome.genome, win/n)
        return fitness
    else:
        def fitness(offspring):
            offsp_eval=offspring.copy()
            for i in range(len(offsp_eval)):
                offsp_eval[i] =  Individual(offsp_eval[i].genome, 0)
            for i in range(len(offsp_eval)):
                for j in range(len(offsp_eval)):
                    if i != j:
                        strategy = (adaptive_maker(offsp_eval[i].genome, type), adaptive_maker(offsp_eval[j].genome, type))
                        win=plays(strategy)
                        if win ==0:
                            offsp_eval[i] = Individual(offsp_eval[i].genome, offsp_eval[i].fitness +1)
                        else:
                            offsp_eval[j] = Individual(offsp_eval[j].genome, offsp_eval[j].fitness +1)
            return offsp_eval
        return fitness

def tweak_maker(type, self_adaptation, hidden):
    if type == "Simple":
        def tweak_simple(old_genome, mu= 0, sigma= 1):
            if not self_adaptation:
                genome = old_genome.genome
                n = len(genome)
                delta = np.random.normal(mu, sigma, n)

                new_genome = dict()
                for i, (k,v) in enumerate(genome.items()):
                    new_genome[k] = v + delta[i]
                    if new_genome[k] <0:
                        new_genome[k] = 0
                    if new_genome[k] >=1:
                        new_genome[k] = 0.9999999999999999 if k == "index" else 1.0

            else:
                genome = old_genome.genome
                new_genome = dict()

                n = genome["parameters"].shape
                delta = genome["parameters"] * np.exp(1/np.sqrt(2) * np.random.normal(mu, sigma, n))

                new_genome["parameters"] = delta
                i=0
                for k,v in genome.items():
                    if k != "parameters":
                        new_genome[k] = (v + np.random.normal(0, delta[i], 1))[0]
                        if new_genome[k] <0:
                            new_genome[k] = 0
                        if new_genome[k] >=1:
                            new_genome[k] = 0.9999999999999999 if k == "index" else 1.0
                        i+=1

            return Individual(new_genome, 0)
        return tweak_simple
    elif type == "Choice":
        def tweak_choice(old_genome, mu = 0, sigma = 1):
            if not self_adaptation:
                genome = old_genome.genome
                n = len(genome)
                delta = np.random.normal(mu, sigma, n)

                new_genome = dict()
                for i, (k,v) in enumerate(genome.items()):
                    new_genome[k] = v + delta[i]
                    if new_genome[k] <0:
                        new_genome[k] = 0
                    if new_genome[k] >=1:
                        new_genome[k] = 0.9999999999999999 if k == "first_index" or k=="second_index" else 1.0
            else:
                genome = old_genome.genome
                new_genome = dict()

                n = genome["parameters"].shape
                delta = genome["parameters"] * np.exp(1/np.sqrt(2) * np.random.normal(mu, sigma, n))

                new_genome["parameters"] = delta
                i=0
                for k,v in genome.items():
                    if k != "parameters":
                        new_genome[k] = (v + np.random.normal(0, delta[i], 1))[0]
                        if new_genome[k] <0:
                            new_genome[k] = 0
                        if new_genome[k] >=1:
                            new_genome[k] = 0.9999999999999999 if k == "first_index" or k=="second_index" else 1.0
                        i+=1

            return Individual(new_genome, 0)
        return tweak_choice
    
    elif type == "NN":

        def tweak_nn(old_genome, mu= 0, sigma= 1):
            if not self_adaptation:
                genome = old_genome.genome
                new_genome = dict()
                for k,v in genome.items():
                    n = v.shape
                    delta = np.random.normal(mu, sigma, n)
                    
                    new_genome[k] = v + delta
            else:
                genome = old_genome.genome
                new_genome = dict()

                n = genome["parameters"].shape
                delta = np.random.normal(mu, sigma, n)
                new_genome["parameters"] = genome["parameters"] * np.exp(1/np.sqrt(2) * np.random.normal(mu, sigma, n))
                
                new_genome["weights1"] = genome["weights1"].copy()
                for i in range(genome["weights1"].shape[0]):
                    for j in range(genome["weights1"].shape[1]):
                        delta = np.random.normal(mu, new_genome["parameters"][i*hidden +j], 1)
                        new_genome["weights1"][i][j] += delta
                
                new_genome["bias1"] = genome["bias1"].copy()
                for i in range(genome["bias1"].shape[0]):
                    delta = np.random.normal(mu, new_genome["parameters"][5*hidden + i], 1)
                    new_genome["bias1"][i] += delta
                
                new_genome["weights2"] = genome["weights2"].copy()
                for i in range(genome["weights2"].shape[0]):
                    for j in range(genome["weights2"].shape[1]):
                        delta = np.random.normal(mu, new_genome["parameters"][6*hidden + i*2 +j], 1)
                        new_genome["weights2"][i][j] += delta
                
                new_genome["bias2"] = genome["bias2"].copy()
                for i in range(genome["bias2"].shape[0]):
                    delta = np.random.normal(mu, new_genome["parameters"][8*hidden + i], 1)
                    new_genome["bias2"][i] += delta

            return Individual(new_genome, 0)
        return tweak_nn

In [78]:
def start(fitness, type, self_adaptation, hidden):
    if type == "Simple":
        if not self_adaptation:
            genome = dict()
            genome["index"] = random.random()
            genome["quantity"] = random.random()
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)
        else:
            genome = dict()
            genome["parameters"] = np.random.normal(0, 0.5, 2)**2
            genome["index"] = random.random()
            genome["quantity"] = random.random()
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)
        
    elif type == "Choice":
        if not self_adaptation:
            genome = dict()
            genome["first_index"] = random.random()
            genome["second_index"] = random.random()
            genome["first_quantity"] = random.random()
            genome["second_quantity"] = random.random()
            genome["threshold"] = random.random()
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)
        else:
            genome = dict()
            genome["parameters"] = np.random.normal(0, 0.5, 5)**2
            genome["first_index"] = random.random()
            genome["second_index"] = random.random()
            genome["first_quantity"] = random.random()
            genome["second_quantity"] = random.random()
            genome["threshold"] = random.random()
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)
        
    elif type == "NN":
        if not self_adaptation:
            genome = dict()
            genome["weights1"] = np.random.normal(0, 0.5, (5, hidden))
            genome["bias1"] = np.random.normal(0, 0.1, (hidden,))
            genome["weights2"] = np.random.normal(0, 0.5, (hidden, 2))
            genome["bias2"] = np.random.normal(0, 0.1, (2))
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)
        else:
            genome = dict()
            genome["parameters"] = np.random.normal(0, 0.1, 8*hidden+ 2)**2
            genome["weights1"] = np.random.normal(0, 0.5, (5, hidden))
            genome["bias1"] = np.random.normal(0, 0.1, (hidden,))
            genome["weights2"] = np.random.normal(0, 0.5, (hidden, 2))
            genome["bias2"] = np.random.normal(0, 0.1, (2))
            return fitness(Individual(genome, 0)) if fitness!= None else Individual(genome, 0)

In [79]:
def es_(type= "Simple", self_adaptation = False, comma = False, iteration = 30, lambda_ = 150, n=1000, mu=1, hidden=7, fitness_random =False):
    fitness = fitness_maker(type = type, n=n, random = True) if mu == 1 or fitness_random else fitness_maker(type = type, n=n, random = False)
    tweak = tweak_maker(type = type, self_adaptation = self_adaptation, hidden=hidden)
    if mu == 1 or fitness_random:
        population = [start(fitness = fitness, type = type, self_adaptation = self_adaptation, hidden=hidden) for _ in range(mu)] 
    else:
        population = [start(fitness = None, type = type, self_adaptation = self_adaptation, hidden=hidden) for _ in range(mu)]
        population = fitness(population)
    for _ in range(iteration):
        offsprings = [tweak(population[random.randint(0, mu-1)]) for _ in range(lambda_)]

        if not comma:
            population.extend(offsprings)
        else:
            population = offsprings


        population = [fitness(o) for o in population] if mu == 1 or fitness_random else fitness(population)
        population.sort(key=lambda x: x.fitness, reverse=True)
        population = population[:mu]
        
        print(f"Itereation: {_ + 1} \t Best fitness: {population[0].fitness}")
    return population[0].genome

In [80]:
best_genome = es_(type = "Choice", iteration = 10, lambda_ =  100, self_adaptation=True, mu=7, hidden=7, fitness_random = True)
print(f"Best Genome: {best_genome}")

Itereation: 1 	 Best fitness: 0.793
Itereation: 2 	 Best fitness: 0.783
Itereation: 3 	 Best fitness: 0.813
Itereation: 4 	 Best fitness: 0.806
Itereation: 5 	 Best fitness: 0.795
Itereation: 6 	 Best fitness: 0.803
Itereation: 7 	 Best fitness: 0.814
Itereation: 8 	 Best fitness: 0.808
Itereation: 9 	 Best fitness: 0.803
Itereation: 10 	 Best fitness: 0.8
Itereation: 11 	 Best fitness: 0.809
Itereation: 12 	 Best fitness: 0.8
Itereation: 13 	 Best fitness: 0.803
Itereation: 14 	 Best fitness: 0.804
Itereation: 15 	 Best fitness: 0.816
Best Genome: {'parameters': array([1.28315537e-02, 1.80993522e-01, 1.60437930e-04, 7.53248882e-03,
       4.18602052e-01]), 'first_index': 0.3352922488469692, 'second_index': 0.5803510827285104, 'first_quantity': 0.9081066313787659, 'second_quantity': 0.8645618551272221, 'threshold': 0.10783020861040646}


In [81]:
logging.getLogger().setLevel(logging.INFO)

strategy = (adaptive_maker(best_genome, type="Choice"), pure_random)

nim = Nim(5)
logging.info(f"init : {nim}")
player = 0
while nim:
    ply = strategy[player](nim)
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply)
    logging.info(f"status: {nim}")
    player = 1 - player
logging.info(f"status: Player {player} won!")

INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=2)
INFO:root:status: <1 1 5 7 9>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=5)
INFO:root:status: <1 1 0 7 9>
INFO:root:ply: player 0 plays Nimply(row=1, num_objects=1)
INFO:root:status: <1 0 0 7 9>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 0 0 7 9>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=6)
INFO:root:status: <0 0 0 1 9>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=7)
INFO:root:status: <0 0 0 1 2>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 0 0 0 2>
INFO:root:ply: player 1 plays Nimply(row=4, num_objects=2)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 0 won!
