In [30]:
from functools import reduce
from collections import namedtuple
from dataclasses import dataclass
from typing import Callable
from copy import deepcopy
from tqdm import tqdm

from pprint import pprint

import numpy as np
import random
import sys

## Evolutionary parameters

In [31]:
POPULATION_SIZE = 30
TOURNAMENT_SIZE = 10
MUTATION_PROBABILITY = .5
N_STRATEGIES = 3
TOT_N_OFFSPRINGS = 1e3
λ = 30
τ = 1/np.sqrt(N_STRATEGIES)

TRAINING_GAMES = 100
ROWS = 4

## The *Nim* and *Nimply* classes

In [32]:
Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = sys.maxsize) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert num_objects <= self._k
        self._rows[row] -= num_objects

## Functions for optimal strageies

In [33]:
def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

def advanced_nim_sum(state: Nim) -> int:
    mex = [x % (state._k + 1) for x in state.rows]
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in mex])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)

def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        if raw._k < ply.num_objects:
            continue
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = advanced_nim_sum(tmp)
        
    return cooked

## Strategies

In [34]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, min(state._k, state._rows[row]))
    return Nimply(row, num_objects)

def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0]
    if not spicy_moves:
        ply = pure_random(state)
    else:
        ply = random.choice(spicy_moves)
        
    return ply

def agent(genotype) -> Callable:
    def agent_style(state: Nim) -> Nimply:
        probabilities = genotype[:N_STRATEGIES]
        probabilities = np.abs(probabilities)
        probabilities /= sum(probabilities)
        cumulative_p = 0
        chosen_strategy = None
        rand_strategy = random.random()
        for i, p in enumerate(probabilities):
            if rand_strategy >= cumulative_p and rand_strategy <= cumulative_p + p:
                chosen_strategy = i
            cumulative_p += p
        
        ply = strategy_mapping[chosen_strategy](state)
        return ply
    return agent_style

strategy_mapping = {
    0: pure_random,
    1: gabriele,
    2: optimal,
}

## Evolutionary methods

In [35]:
@dataclass
class Individual:
    genotype: list[float]
    fitness: float

def select_parent(pop) -> Individual:
    pool = [random.choice(pop) for _ in range(TOURNAMENT_SIZE)]
    champion = max(pool, key=lambda i: i.fitness)
    return champion

def mutate(ind: Individual) -> Individual:
    # Copy the individual
    offspring = deepcopy(ind)
    
    # Mutate σ of the correspoding strategies
    offspring.genotype[N_STRATEGIES:] = offspring.genotype[N_STRATEGIES:]*np.exp(τ*np.random.normal(loc=0, scale=1))

    # Mutate the strategies "probabilities" with corresponding σ
    offspring.genotype[:N_STRATEGIES] = np.random.normal(loc=0, scale=offspring.genotype[N_STRATEGIES:]) + ind.genotype[:N_STRATEGIES]
    offspring.fitness = None
    return offspring

def one_cut_xover(ind1: Individual, ind2: Individual) -> Individual:
    cut_point = random.randint(0, N_STRATEGIES-1)
    offspring = Individual(fitness=None,
                        #   genotype=np.concatenate((ind1.genotype[:cut_point], ind2.genotype[cut_point:N_STRATEGIES], ind1.genotype[N_STRATEGIES:]), axis=None))
                            genotype=np.concatenate((ind1.genotype[:cut_point], ind2.genotype[cut_point:]), axis=None))
    assert len(offspring.genotype) == N_STRATEGIES*2
    return offspring

def fitness(genotype) -> float:
    # Make the individual play
    wins = 0
    # for game in tqdm(range(TRAINING_GAMES), desc=f"calculating fitness, playing {TRAINING_GAMES} games"):
    for game in range(TRAINING_GAMES):
        nim = Nim(ROWS)
        if game % 2:
            player = 0
        else:
            player = 1
            
        strategy = (agent(genotype), strategy_mapping[game % N_STRATEGIES])

        while nim:
            ply = strategy[player](nim)
            nim.nimming(ply)
            player = 1 - player
            
        if player == 1:
            wins += 1
        
    return wins/TRAINING_GAMES


## Generating strating population

In [36]:
population = [
    Individual(
        genotype=np.concatenate(([random.random() for _ in range(N_STRATEGIES)], np.ones(N_STRATEGIES)), axis=None),
        fitness=None,
    )
    for _ in range(POPULATION_SIZE)
]

for i in population:
    i.fitness = fitness(i.genotype)

# print(f"best individuals:{sorted(population, key=lambda i: i.fitness, reverse=True)[:5]}")

## Evolutionary algorithm

In [37]:
for step in tqdm(range(int(TOT_N_OFFSPRINGS // λ)), desc=f"Producing {TOT_N_OFFSPRINGS // λ} generations"):
    # Generate offsprings
    offspring = list()
    for counter in range(λ):
        if random.random() < MUTATION_PROBABILITY:  # self-adapt mutation probability
            # mutation  # add more clever mutations
            p = select_parent(population)
            o = mutate(p)
        else:
            # xover # add more xovers
            p1 = select_parent(population)
            p2 = select_parent(population)
            o = one_cut_xover(p1, p2)
        offspring.append(o)

    for i in offspring:
        i.fitness = fitness(i.genotype)

    # Selection of survivals    
    population.extend(offspring)
    population.sort(key=lambda i: i.fitness, reverse=True)
    population = population[:POPULATION_SIZE]

    print(f"best individual:{population[0].genotype}, fitness={population[0].fitness}")
    # print(f"individuals:{population[:5]}")

Producing 33.0 generations:   3%|▎         | 1/33 [00:04<02:19,  4.35s/it]

best individual:[0.00487675 0.1968242  0.72969818 1.         1.         1.        ], fitness=0.76


Producing 33.0 generations:   6%|▌         | 2/33 [00:09<02:22,  4.60s/it]

best individual:[0.00487675 0.1968242  0.72969818 1.         1.         1.        ], fitness=0.76


Producing 33.0 generations:   9%|▉         | 3/33 [00:14<02:26,  4.88s/it]

best individual:[0.00487675 0.1968242  0.72969818 1.         1.         1.        ], fitness=0.76


Producing 33.0 generations:  12%|█▏        | 4/33 [00:19<02:23,  4.95s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.8


Producing 33.0 generations:  15%|█▌        | 5/33 [00:24<02:24,  5.16s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.8


Producing 33.0 generations:  18%|█▊        | 6/33 [00:30<02:26,  5.43s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.8


Producing 33.0 generations:  21%|██        | 7/33 [00:36<02:26,  5.64s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.81


Producing 33.0 generations:  24%|██▍       | 8/33 [00:42<02:23,  5.75s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  27%|██▋       | 9/33 [00:48<02:17,  5.74s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  30%|███       | 10/33 [00:54<02:12,  5.77s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  33%|███▎      | 11/33 [01:00<02:09,  5.90s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  36%|███▋      | 12/33 [01:06<02:04,  5.94s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  39%|███▉      | 13/33 [01:12<01:57,  5.88s/it]

best individual:[0.00487675 0.1968242  2.37988695 1.03272284 1.03272284 1.03272284], fitness=0.82


Producing 33.0 generations:  39%|███▉      | 13/33 [01:15<01:56,  5.83s/it]


KeyboardInterrupt: 