In [587]:
import logging
import random
from typing import Callable
from collections import namedtuple
from copy import deepcopy
from itertools import accumulate
from operator import xor

In [588]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [589]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i*2+1 for i in range(num_rows)]
        self._k = k
        self._total_elements = num_rows*num_rows
    
    def __bool__(self):
        return sum(self.rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    @property
    def total_elements(self) -> int:
        return self._total_elements

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [590]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result

In [591]:
def cook_status(state: Nim) -> dict:
    cooked = dict()
    #ALL THE POSSIBLE MOVES
    cooked['possible_moves'] = [(r, o) for r,c in enumerate(state.rows) for o in range(1, c+1) if state.k is None or o <= state.k]
    #NUMBER OR ROWS "ACTIVE": a row is active when we have at least one element.
    cooked['activate_rows_number'] = sum(o > 0 for o in state.rows)
    #INTEGER representing the index of row with the minimum number of objects 
    cooked['shortest_row'] = min((x for x in enumerate(state.rows) if x[1] > 0), key = lambda y: y[1])[0]
    #INTEGER representing the index of row with the maximum number of objects 
    cooked['longest_row'] = max((x for x in enumerate(state.rows) if x[1] > 0), key = lambda y: y[1])[0]
    #NIM_SUM
    cooked['nim_sum'] = nim_sum(state)
    cooked["completion"] = sum(o for o in state.rows) / state.total_elements
    cooked["random"] = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    
    # brute_force = list()
    # for m in cooked['possible_moves']:
    #     tmp = deepcopy(state)
    #     tmp.nimming(m)
    #     #we append the tuple (m, nim_sum(tmp)) which represents the move and the possible effect (in terms of nim_sum) on the state
    #     brute_force.append((m, nim_sum(tmp)))
    # cooked['brute_force'] = brute_force

    return cooked

# Possible strategies of the other player

In [592]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

In [593]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


def randomSmart(state: Nim) -> Nimply:
    data = cook_status(state)
    if data["activate_rows_number"]==1:
       return Nimply(data["random"], state.rows[data["random"]])
    else: 
        #row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
        row = data["random"]
        num_objects = random.randint(1, state.rows[row])
        return Nimply(row, num_objects)

In [594]:
def optimal_strategy(state: Nim) -> Nimply:
    data = cook_status(state)
    return next((bf for bf in data['brute_force'] if bf[1] == 0), random.choice(data['brute_force']))[0]

## Possible strategies we can play

In [595]:
def make_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)
        if random.random() < genome['p']:
            ply = Nimply(data['shortest_row'], random.randint(1, state.rows[data['shortest_row']]))
        else:
            ply = Nimply(data['longest_row'], random.randint(1, state.rows[data['longest_row']]))

        return ply
    return evolvable

In [596]:
#DICTIONARY OF PARAMETERS: {"%_taken_longest", "%_taken_shortest", "binary_chance"} 
def make_strategy_evol(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if random.random() < genome['binary_chance']:
            x = max(1, int(state.rows[data['shortest_row']]*genome['%_taken_shortest']/100))
            ply = Nimply(data['shortest_row'],random.randint(1, x))    
        else:
            x = max(1, int(state.rows[data['longest_row']]*genome['%_taken_longest']/100))
            ply = Nimply(data['longest_row'], random.randint(1, x)) 
        
        return ply
    return evolvable

In [597]:
def completion_strategy_with_min2(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)
        
        safety = []
        can_be_safety = []
        counter = 0
        for r in state.rows:
            if r > 2:
                can_be_safety.append(counter)
            if r <= 2 and not r == 0:
                safety.append(counter)
            counter += 1

        if data["activate_rows_number"] == 1:
            # take the whole last row
            ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
        elif len(safety) < genome["p1"] and len(can_be_safety) > 0:
            # need safety, make a safety
            row_choice = random.choice(can_be_safety)
            ply = Nimply(row_choice, state.rows[row_choice] - 2)
        elif data["completion"] < genome["p2"] and len(safety) > 0:
            # use safety
            row_choice = random.choice(safety)
            ply = Nimply(row_choice, 1)
        else:
            # do normal
            # ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
            ply = Nimply(data["random"], state.rows[data["random"]])
        return ply

    return evolvable

In [598]:
def E2longestVSshortest_allVS1(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if random.random() < genome["p1"]:
            if random.random() < genome["p2"]:
                ply = Nimply(data["longest_row"], state.rows[data["longest_row"]])
            else:
                ply = Nimply(data["longest_row"], 1)
        else:
            if random.random() < genome["p2"]:
                ply = Nimply(data["shortest_row"], state.rows[data["shortest_row"]])
            else:
                ply = Nimply(data["shortest_row"], 1)

        return ply
    return evolvable

## Oversimplified match

In [599]:
def play(nim, strategy) -> int:
    logging.debug(f"status: Initial board -> {nim}")
    player = 0
    while nim:
        logging.debug(f"{nim} {player}")
        ply = strategy[player](nim)
        nim.nimming(ply)
        logging.debug(f"status: After player {player} -> {nim}")
        player = 1 - player
    winner = 1 - player
    logging.debug(f"status: Player {winner} won!")
    return winner

In [600]:
NUM_MATCHES = 100
NIM_SIZE = 11

def evaluate2(strategy: Callable) -> float:
    opponent = (strategy, optimal_strategy)
    won = 0
    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        
        winner_player =  play(nim, opponent)
        if winner_player == 1:
            won += 1
    return won/NUM_MATCHES

def evaluate(strategy: Callable) -> float:
    opponent = (strategy, completion_strategy_with_min2({"p1": 1, "p2":0.3}))
    won = 0

    for m in range(NUM_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while nim:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        if player == 1:
            won += 1
    return won / NUM_MATCHES

In [601]:
logging.getLogger().setLevel(logging.INFO)

# strategy = (make_strategy({'p': .1}), optimal_strategy)
# nim = Nim(11)

# play(nim, strategy)

## EVOLVED RULES

In [602]:
POPULATION_SIZE = 10
OFFSPRING_SIZE = 5
NUM_ITERATIONS = 1000

In [603]:
def init_population() -> list:
    population = list()
    for _ in range(POPULATION_SIZE):
        param = {'%_taken_longest': random.randint(0, 100), '%_taken_shortest': random.randint(0, 100), 'binary_chance' : random.random()}
        if param not in population:
            population.append((param, evaluate(make_strategy_evol(param))))
    return population

def tournament_selection(population) -> dict:
    return population[random.choice(range(len(population)))][0]

def tweak(parameters) -> dict:
    new_param = dict()
    new_param["binary_chance"] = parameters["binary_chance"] + random.gauss(0, 0.1)
    new_param["%_taken_shortest"] = parameters["%_taken_shortest"] + random.gauss(0, 0.1)
    new_param["%_taken_longest"] = parameters["%_taken_longest"] + random.gauss(0, 0.1)
    return new_param

In [604]:
## GENETIC ALGORITHM

In [605]:
random.seed(42)
population = init_population()

for _ in range(1000):
    for __ in range(OFFSPRING_SIZE):
        offspring_pool = list()
        parameters = tournament_selection(population)
        offspring = tweak(parameters)
        o = (offspring, evaluate(make_strategy_evol(offspring)))
        if o not in offspring_pool:
            offspring_pool.append(o)
    
    population += offspring_pool
    unique_population = list()
    for p in population:
        if p not in unique_population:
            unique_population.append(p)
    population = unique_population
    population.sort(key=lambda x: x[1], reverse=True)
    population = population[:POPULATION_SIZE]
    logging.info(f"Iteration {_} : best {population[0][0]} nWin {population[0][1]}")

INFO:root:Iteration 0 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 1 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 2 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 3 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 4 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 5 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 6 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nWin 0.48
INFO:root:Iteration 7 : best {'%_taken_longest': 81, '%_taken_shortest': 14, 'binary_chance': 0.025010755222666936} nW

KeyboardInterrupt: 

In [None]:
evaluate(make_strategy_evol({'binary_chance': 1.4828148238731833, '%_taken_shortest': 55.9591599630435, '%_taken_longest': 77.16217066865488}))

0.0