In [1]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor

logging.getLogger().setLevel(logging.INFO)

## The *Nim* and *Nimply* classes

In [21]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [22]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [23]:
def evaluate(strategyA: Callable, strategyB: Callable, num_matches = 1, nim_size = 3, k = None) -> float:
    players = (strategyA, strategyB)
    won = 0

    for _ in range(num_matches):
        nim = Nim(nim_size, k)
        player = 1
        while nim:
            ply = players[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / num_matches

## Random strategy

In [29]:
# Choose a random non empty row and remove a random number of objects smaller than min(k, row_objects)

def random_strategy(state: Nim):
    r = random.choice([idx for idx, r in enumerate(state.rows) if r > 0])
    num_objects = random.randint(1, min(state.rows[r], state.k) if state.k != None else state.rows[r])

    return (r, num_objects)

## Task 3.2: Evolved Strategy

In [35]:
from statistics import *

def my_xor(collection):
    *_, result = accumulate(collection, xor)
    return result

RULES = [sum, min, max, mean, stdev] # [my_xor, sum, min, max, mean, stdev]

def evolvable_strategy(genome):
    def strategy(state: Nim):
            possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k]

            for m in possible_moves:
                tmp = deepcopy(state)
                tmp.nimming(m)
                
                play = (m, sum(w * op(tmp.rows) for w, op in zip(genome, RULES)))

                if play[1] == 0:
                    return play[0]
                                
            return possible_moves[0]
    
    return strategy

def mutation(genome):
    point = random.randint(0, len(genome) - 1)
    return genome[:point] + [1 - genome[point]] + genome[point + 1:]

def crossover(genomeA, genomeB):
    p = random.random()
    return [x if p < 0.5 else y for x, y in zip(genomeA, genomeB)]

def tournament(population, tournament_size):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)

def fitness(genome):
    games = 5
    win_optimal = 0.0
    win_random = 0.0

    for nim_size, k in ((nim_size, k) for nim_size, k in ((random.randint(3, 10), random.choice([None, random.randint(1, 10)])) for _ in range(games))):
        home = evaluate(evolvable_strategy(genome), random_strategy, nim_size=nim_size, k=k) 
        away = 1 - evaluate(random_strategy, evolvable_strategy(genome), nim_size=nim_size, k=k)
        win_random += home + away
    return (win_optimal/(2*games), win_random/(2*games))
        
def genetic_algorithm():
    Individual = namedtuple('Individual', ('genome', 'fitness'))

    NUM_GENS = 100    
    POPULATION_SIZE = 10
    OFFSPRING_SIZE = 20
    TOURNAMENT_SIZE = 2
    USELESS_GENS = 0
    STEADY_STATE_LIMIT = 10

    population = [Individual(i, fitness(i)) for i in ([0 for _ in range(len(RULES))] for _ in range(POPULATION_SIZE))]
    best = None
    
    for g in range(NUM_GENS):
        offspring = list()
        for i in range(OFFSPRING_SIZE):
            p1 = tournament(population, tournament_size=TOURNAMENT_SIZE)
            p2 = tournament(population, tournament_size=TOURNAMENT_SIZE)
            o = crossover(p1.genome, p2.genome)
            f = fitness(o)
            offspring.append(Individual(o, f))
        population += offspring
        population = sorted(population, key=lambda i: i.fitness, reverse=True)[:POPULATION_SIZE]
        newBest = max(population, key=lambda i: i.fitness)

        if best != None and newBest <= best:
            logging.info(f'Gen {g+1} skipped because useless')
            USELESS_GENS += 1
        else:
            logging.info(f'Gen {g+1}, found new best individual: {newBest}')
            best = newBest
            USELESS_GENS = 0
        
        if USELESS_GENS == STEADY_STATE_LIMIT:
            break

    return evolvable_strategy(best.genome)

## Oversimplified match

In [36]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (genetic_algorithm(), random_strategy)

nim = Nim(7, None)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = strategy[player](nim) 
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

INFO:root:Gen 1, found new best individual: Individual(genome=[0, 0, 0, 0, 0], fitness=(0.0, 0.7))
INFO:root:Gen 2 skipped because useless
INFO:root:Gen 3 skipped because useless
INFO:root:Gen 4, found new best individual: Individual(genome=[0, 0, 0, 0, 0], fitness=(0.0, 0.8))
INFO:root:Gen 5 skipped because useless
INFO:root:Gen 6 skipped because useless
INFO:root:Gen 7 skipped because useless
INFO:root:Gen 8 skipped because useless
INFO:root:Gen 9 skipped because useless
INFO:root:Gen 10 skipped because useless
INFO:root:Gen 11 skipped because useless
INFO:root:Gen 12 skipped because useless
INFO:root:Gen 13 skipped because useless
INFO:root:Gen 14 skipped because useless
DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13>
DEBUG:root:status: After player 0 -> <0 3 5 7 9 11 13>
DEBUG:root:status: After player 1 -> <0 3 5 7 8 11 13>
DEBUG:root:status: After player 0 -> <0 2 5 7 8 11 13>
DEBUG:root:status: After player 1 -> <0 2 4 7 8 11 13>
DEBUG:root:status: After player 0 -> <0 1 