Copyright **`(c)`** 2023 Ivan Magistro Contenta `<s314356@studenti.polito.it>`  
[`https://github.com/ivanmag22/computational-intelligence`](https://github.com/ivanmag22/computational-intelligence)


# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [980]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy


## The *Nim* and *Nimply* classes

In [981]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [982]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"
    
    def __len__(self):
        return len(self._rows)

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [983]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


In [984]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [985]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [986]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


## Evolutionary Strategies

In [987]:
N_RULES = 4

def r0(state: Nim) -> Nimply:   # it takes only objects from even rows (0,2,4,...)
    rows = list(state.rows)

    move = Nimply(0, 0)
    for j in [i for i in range(state.__len__()) if i%2==0]:
        if rows[j] > 0:
            if (state._k is not None and rows[j] <= state._k) or state._k is None:
                move = Nimply(j, rows[j])
            elif rows[j] > state._k and state._k is not None:
                move = Nimply(j, state._k)
            break

    return move

def r1(state: Nim) -> Nimply:   # it takes all the objects of a whole row (no one object has been taken and the number of objects is less or equal to k, if it is fixed)
    rows = list(state.rows)

    move = Nimply(0, 0)
    for i in range(state.__len__()):
        if (state._k is not None and state._k <= i*2+1) or state._k is None:
            if rows[i] == i*2+1:
                move = Nimply(i, i*2+1)
                break
        else:
            break

    return move

def r2(state: Nim) -> Nimply:   # it takes only one object from a row
    rows = list(enumerate(list(state.rows)))    # it stores the correspondence between the actual row and its number of available objects
    random.shuffle(rows)    # it shuffles the rows in order to pick one object randomly from a row, but we maintain the information of the actual row

    move = Nimply(0, 0)
    for i in range(state.__len__()):
        if rows[i][1] > 0:
            move = Nimply(rows[i][0], 1)
            break

    return move

def r3(state: Nim) -> Nimply:   # it takes objects from the last available row
    rows = list(state.rows)

    move = Nimply(0, 0)
    for i in [state.__len__()-j-1 for j in range(state.__len__())]:
        if (state._k is not None and state._k <= i*2+1) or state._k is None:
            if rows[i] > 0:
                if rows[i] > 1:
                    move = Nimply(i, random.randint(1, rows[i]))
                else:
                    move = Nimply(i, 1)
                break
        else:
            break

    return move

In [988]:
# Class to change the weight of each rule and to memorize the accuracy
class NimGame:
    def __init__(self, weights: list[float]) -> None:
        self._weights = weights
        self._dict = {
            0: r0,
            1: r1,
            2: r2,
            3: r3,
        }
        self._acc = 0

    def __len__(self) -> int:
        return len(self._weights)

    def get_rule_by_index(self, index: int):
        assert index>=0 and index<self.__len__()
        return self._dict[index]

    def get_weights(self) -> list[float]:
        return self._weights
    
    def get_accuracy(self) -> float:
        return self._acc
    
    def set_accuracy(self, acc: float) -> None:
        self._acc = acc


In [989]:
def fitness(game: NimGame):
    return game.get_accuracy()

In [990]:
def mutation(state: NimGame, λ: int, σ: float):   # it changes the weights
    offsprings = []
    for _ in range(λ):
        new_state = deepcopy(state) # NimGame object
        ll = new_state.__len__()

        weights = np.random.normal(loc=0, scale=σ, size=(ll)) + new_state.get_weights()
        weights = weights / sum(weights)    # in order that the sum of the weights is always 1.0 (sum of the probabilities should be 1)

        offsprings.append(NimGame(weights))
    
    return offsprings

In [991]:
def es_wrapper(evol: NimGame):  # wrapper that initialites the weighted player
    weights = evol.get_weights()

    def weighted(state: Nim) -> Nimply:  # player with weighted strategy: at each step it picks the "heaviest" move (if it is activated)
        
        values = []
        moves = []
        for i in range(evol.__len__()):
            new_state = deepcopy(state)
            rule = evol.get_rule_by_index(i)
            move = rule(new_state)
            moves.append(move)
            if move == Nimply(0, 0):
                values.append(0)
            else:
                values.append(1)

        choice = 0
        best = 0
        for i in range(evol.__len__()):
            if weights[i] * values[i] > best:
                best = weights[i] * values[i]
                choice = i
            elif weights[i] * values[i] == best:
                choice = random.choice([i, choice])
        return moves[choice]
    
    return weighted

In [992]:
N_GAMES = 30

def play_games(state: Nim, games: list[NimGame]) -> float:
    acc = 0

    for game in games:
        challenger = es_wrapper(game)
        opponent = [pure_random, gabriele, optimal]

        for x in opponent:  # total number of matches: #opponents * N_GAMES
            strategy = (challenger, x)
            wins = 0

            for _ in range(N_GAMES):    # N_GAMES against each opponent
                new_state = deepcopy(state)
                player = 0
                while new_state:
                    ply = strategy[player](new_state)
                    new_state.nimming(ply)
                    player = 1 - player
                if player == 0:
                    wins += 1
        
            acc += (wins/N_GAMES)/3

        game.set_accuracy(acc)
        
        acc = 0

    return

In [993]:
N_GAMES = 90

def play_games_with_optimal(state: Nim, games: list[NimGame]) -> float:
    acc = 0

    for game in games:
        challenger = es_wrapper(game)
        opponent = optimal

        strategy = (challenger, opponent)
        wins = 0

        for _ in range(N_GAMES):    # N_GAMES against each opponent
            new_state = deepcopy(state)
            player = 0
            while new_state:
                ply = strategy[player](new_state)
                new_state.nimming(ply)
                player = 1 - player
            if player == 0:
                wins += 1
        
        acc += (wins/N_GAMES)

        game.set_accuracy(acc)

    return

In [994]:
play = play_games

# (1 + λ)-ES

In [995]:
def evolutionary_plus(state: Nim) -> NimGame:
    λ = 20
    sigma = 0.001
    
    solution = NimGame([1/N_RULES for _ in range(N_RULES)]) # I have to put a list of probability as parameter
    best_so_far = deepcopy(solution)

    stats = [0, 0]
    for step in range(100 // λ):
        offspring = [ best_so_far ] # we are searching individuals around the best one

        play(state, [ solution ])   # parent's fitness

        offspring = offspring + mutation(solution, λ, sigma)
        
        # evaluate and select best
        evals = []
        play(state, offspring)
        for x in offspring:
            evals.append(fitness(x))

        # rule one-out-of-five: to modify mutation step
        stats[0] += λ
        stats[1] += sum(x > fitness(solution) for x in evals)
        if (step + 1) % 200 == 0:
            if stats[0] / stats[1] < 1 / 5:
                sigma /= 1.1
            elif stats[0] / stats[1] > 1 / 5:
                sigma *= 1.1
            stats = [0, 0]

        solution = offspring[np.argmax(evals)]
        if fitness(best_so_far) < fitness(solution):
            best_so_far = deepcopy(solution)
    
    return best_so_far

# (1, λ)-ES

In [996]:
def evolutionary_comma(state: Nim) -> NimGame:
    λ = 20
    sigma = 0.001
    
    solution = NimGame([1/N_RULES for _ in range(N_RULES)]) # I have to put a list of probability as parameter
    best_so_far = deepcopy(solution)

    stats = [0, 0]
    for step in range(1_000 // λ):
        offspring = [ ]

        play(state, [ solution ])   # parent's fitness  # TO REVIEW

        offspring = offspring + mutation(solution, λ, sigma)
        
        # evaluate and select best
        evals = []
        play(state, offspring)
        for x in offspring:
            evals.append(fitness(x)) 

        # rule one-out-of-five: to modify mutation step
        stats[0] += λ
        stats[1] += sum(x > fitness(solution) for x in evals)
        if (step + 1) % 200 == 0:
            if stats[0] / stats[1] < 1 / 5:
                sigma /= 1.1
            elif stats[0] / stats[1] > 1 / 5:
                sigma *= 1.1
            stats = [0, 0]

        solution = offspring[np.argmax(evals)]
        if fitness(best_so_far) < fitness(solution):
            best_so_far = deepcopy(solution)
    
    return best_so_far

# (μ + λ)-ES

In [997]:
def evolutionary_plus_mu(state: Nim) -> NimGame:
    λ = 20
    μ = 5
    sigma = 0.001
    
    solution = NimGame([1/N_RULES for _ in range(N_RULES)]) # I have to put a list of probability as parameter
    best_so_far = deepcopy(solution)

    parents = mutation(solution, μ, sigma)

    stats = [0, 0]
    for step in range(1_000 // λ):
        offspring = parents

        play(state, parents)   # parent's fitness

        offspring = offspring + mutation(solution, λ, sigma)    # μ + λ
        
        # evaluate and select best
        evals = []
        play(state, offspring)
        for x in offspring:
            evals.append(fitness(x))

        # rule one-out-of-five: to modify mutation step
        stats[0] += λ
        stats[1] += sum(x > fitness(parents[i]) for x in evals for i in range(len(parents)))
        if (step + 1) % 200 == 0:
            if stats[0] / stats[1] < 1 / 5:
                sigma /= 1.1
            elif stats[0] / stats[1] > 1 / 5:
                sigma *= 1.1
            stats = [0, 0]

        solution = offspring[np.argmax(evals)]
        if fitness(best_so_far) < fitness(solution):
            best_so_far = deepcopy(solution)
        
        # parents of the next generation: some of them will be the best and the other(s) is (are) one (a few) from the worsts
        offspring.sort(reverse=True, key=fitness)
        split = [ offspring[:len(offspring)//5], offspring[len(offspring)//5:]]
        parents = random.sample(split[0], μ*4//5) + random.sample(split[1], μ//5)
    
    return best_so_far

# (μ, λ)-ES

In [998]:
def evolutionary_comma_mu(state: Nim) -> NimGame:
    λ = 20
    μ = 5
    sigma = 0.001
    
    solution = NimGame([1/N_RULES for _ in range(N_RULES)]) # I have to put a list of probability as parameter
    best_so_far = deepcopy(solution)

    parents = mutation(solution, μ, sigma)

    stats = [0, 0]
    for step in range(1_000 // λ):

        play(state, parents)   # parent's fitness

        offspring = mutation(solution, λ, sigma)    # λ
        
        # evaluate and select best
        evals = []
        play(state, offspring)
        for x in offspring:
            evals.append(fitness(x))

        # rule one-out-of-five: to modify mutation step
        stats[0] += λ
        stats[1] += sum(x > fitness(parents[i]) for x in evals for i in range(len(parents)))
        if (step + 1) % 200 == 0:
            if stats[0] / stats[1] < 1 / 5:
                sigma /= 1.1
            elif stats[0] / stats[1] > 1 / 5:
                sigma *= 1.1
            stats = [0, 0]

        solution = offspring[np.argmax(evals)]
        if fitness(best_so_far) < fitness(solution):
            best_so_far = deepcopy(solution)
        
        # parents of the next generation: some of them will be the best and the other(s) is (are) one (a few) from the worsts
        offspring.sort(reverse=True, key=fitness)
        split = [ offspring[:len(offspring)//5], offspring[len(offspring)//5:]]
        parents = random.sample(split[0], μ*4//5) + random.sample(split[1], μ//5)
    
    return best_so_far

## Oversimplified match

In [999]:
logging.getLogger().setLevel(logging.INFO)

nim = Nim(5)

evolutionary = evolutionary_comma_mu # evolutionary_plus, evolutionary_comma, evolutionary_plus_mu, evolutionary_comma_mu

es_game = evolutionary(nim)
print("ES-accuracy: ", es_game.get_accuracy())
print("ES-weights: ", es_game.get_weights())
es_player = es_wrapper(es_game)
players = [es_player, optimal]

x = 0
wins = 0
for _ in range(N_GAMES):
    strategy = (players[x], players[1-x])   # one time es_player plays as the first, the successive time as the second, and so on
    player = 0
    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        player = 1 - player
    if players[player] is es_player:
        wins += 1
    if x == 0:
        x = 1
    else:
        x = 0
    
print("Accuracy with optimal opponent: ", wins/N_GAMES)


ES-accuracy:  0.6851851851851852
ES-weights:  [0.24261805 0.25146089 0.25902933 0.24689174]
Accuracy with optimal opponent:  0.9888888888888889
