Copyright **`(c)`** 2023 Francesca Zafonte `<s319331@studenti.polito.it>`  
[`https://github.com/Zafonte/computational-intelligence`](https://github.com/Zafonte/computational-intelligence)  


# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [9]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [10]:
Nimply = namedtuple("Nimply", "row, num_objects") #


In [11]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None: 
        self._rows = [i * 2 + 1 for i in range(num_rows)] 
        self._k = k 

    def __bool__(self): 
        return sum(self._rows) > 0

    def __str__(self): 
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple: 
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply 
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k 
        self._rows[row] -= num_objects
        

## Sample (and silly) startegies 

In [12]:
#STRATEGY 1

def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects) 


In [13]:
#STRATEGY 2

def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [14]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [15]:
#STRATEGY 3 

import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict() 
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)): 
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp) 
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state) 
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0] 
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

STRATEGY 4 - ES

In [None]:
#define n action 
N_ACTION = 5

#it takes one element from a row
def a1(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 1
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take two element from a row
def a2(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 2
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take three element from a row
def a3(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 3
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take four element from a row
def a4(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 4
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take n-1 element from a row where n is the lenght of the row
def a5(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = state.rows[row] - 1
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 


In [None]:
N_PLAYS = 20
actions = [pure_random, gabriele, optimal, a1, a2, a3, a4, a5]

#Every action has a weight and a win_percentage
#I use the weight to do the Gaussian Mutation 
#I use the win_percentage to find the best strategy


#Compute the %win with an action 
def fitness(state: action) -> float:
    return win_percentage



#I define the Mutation function that change the weight of the strategy using a Gaussian Mutation
def mutation(state: action, λ: int, σ: float):   
    for _ in range(λ):
        weight = np.random.normal(loc=0, scale=σ, size=(state.__len__())) + state.weight()
        weight = weight / sum(weight)  
    
    return weight




## Adaptive (μ+λ)-ES

In [None]:
#Code of the prof - to adjust in order to solve my problem
def evolutionary_strategy(state: Nim) -> Nimply:
    μ = 2
    λ = 5
    sigma = 1

    weights = []

    population = np.random.random((μ, N_DIM + 1))
    population[:, :-1] = population[:, :-1] * 5.12 * 2 - 5.12
    population[:, -1] *= σ

    best_fitness = None
    history = list()
    for step in tqdm(range(1_000_000 // λ)):
        # offspring <- select λ random points from the population of μ
        offspring = population[np.random.randint(0, μ, size=(λ,))]
        # mutate all σ (last column) and replace negative values with a small number
        offspring[:, -1] = np.random.normal(
            loc=offspring[:, -1], scale=0.2
        )
        offspring[offspring[:, -1] < 1e-5, -1] = 1e-5
        # mutate all v (all columns but the last), using the σ in the last column
        offspring[:, 0:-1] = np.random.normal(
            loc=offspring[:, 0:-1], scale=offspring[:, -1].reshape(-1, 1)
        )
        #Evaluation = computing the fitness of an individual (an action)
        # add an extra column with the evaluation and sort
        fitness = rastrigin(offspring[:, 0:-1])
        offspring = offspring[fitness.argsort()]
        # save best (just for the plot)
        if best_fitness is None or best_fitness < np.max(fitness):
            best_fitness = np.max(fitness)
            history.append((step, best_fitness))
        # select the μ with max fitness and discard fitness
        population = np.copy(offspring[-μ:])

    fitness = rastrigin(population[:, 0:-1])
    logging.info(
        f"Best solution: {fitness.max()} (with σ={population[fitness.argmax(), -1]:0.3g})"
    )

return best



In [None]:
#history = np.array(history)
#plt.figure(figsize=(14, 4))
#plt.plot(history[:, 0], history[:, 1], marker=".")

## Oversimplified match

In [16]:
logging.getLogger().setLevel(logging.INFO) 

strategy = (evolutionary_strategy, pure_random) 

nim = Nim(5) 
logging.info(f"init : {nim}") 
player = 0
while nim:
    ply = strategy[player](nim) 
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply) 
    logging.info(f"status: {nim}")
    player = 1 - player 
logging.info(f"status: Player {player} won!")



INFO:root:init : <1 3 5 7 9>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=6)
INFO:root:status: <1 3 5 7 3>
INFO:root:ply: player 1 plays Nimply(row=0, num_objects=1)
INFO:root:status: <0 3 5 7 3>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=5)
INFO:root:status: <0 3 5 2 3>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=2)
INFO:root:status: <0 1 5 2 3>
INFO:root:ply: player 0 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 1 5 1 3>
INFO:root:ply: player 1 plays Nimply(row=3, num_objects=1)
INFO:root:status: <0 1 5 0 3>
INFO:root:ply: player 0 plays Nimply(row=4, num_objects=3)
INFO:root:status: <0 1 5 0 0>
INFO:root:ply: player 1 plays Nimply(row=1, num_objects=1)
INFO:root:status: <0 0 5 0 0>
INFO:root:ply: player 0 plays Nimply(row=2, num_objects=2)
INFO:root:status: <0 0 3 0 0>
INFO:root:ply: player 1 plays Nimply(row=2, num_objects=3)
INFO:root:status: <0 0 0 0 0>
INFO:root:status: Player 0 won!
