Copyright **`(c)`** 2023 Zafonte Francesca `<s319331@studenti.polito.it>`  
[`https://github.com/Zafonte/computational-intelligence`](https://github.com/Zafonte/computational-intelligence)  

# Lab 2: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [61]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import deepcopy
from matplotlib import pyplot as plt


## The *Nim* and *Nimply* classes

In [62]:
Nimply = namedtuple("Nimply", "row, num_objects") #


In [63]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None: 
        self._rows = [i * 2 + 1 for i in range(num_rows)] 
        self._k = k 

    def __bool__(self): 
        return sum(self._rows) > 0

    def __str__(self): 
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple: 
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply 
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k 
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [64]:
#STRATEGIA 1

def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects) 




In [65]:
#STRATEGIA 2

def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


In [66]:
def adaptive(state: Nim) -> Nimply:
    """A strategy that can adapt its parameters"""
    genome = {"love_small": 0.5}


In [67]:
#STRATEGIA 3 

import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict() 
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)): 
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp) 
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state) 
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0] 
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

In [68]:
#Others STRATEGIES for ES

#it takes one element from a row
def a1(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 1
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take two element from a row
def a2(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 2
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take three element from a row
def a3(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 3
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take four element from a row
def a4(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = 4
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 

#it take n-1 element from a row where n is the lenght of the row
def a5(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = state.rows[row] - 1
    if state.rows[row] >= num_objects:
        state.rows[row] -= num_objects
    return Nimply(row, state.rows[row]) 


In [69]:
class Strategy:
    def __init__(self, name, weight, win_percentage) -> None:
        self._name = name
        self._weight = weight
        self._win_percentage = win_percentage
    
    def __len__(self) -> int:
        return len(self._weight)

    def get_name(self):
        return self._name

    def get_weight(self) -> float:
        return self._weight

    def set_weight(self, weight: float) -> None:
        self._weight = weight

    def set_win_percentage(self, win_percentage: float) -> None:
        self._win_percentage = win_percentage

In [70]:
weight = 1/5 #I set a proportional weight, at the start
win_percentage = 0.0

#I define a strategy how an action with weight and win_percentage
strategy1 = Strategy(a1, weight, win_percentage)
strategy2 = Strategy(a2, weight, win_percentage)
strategy3 = Strategy(a3, weight, win_percentage)
strategy4 = Strategy(a4, weight, win_percentage)
strategy5 = Strategy(a5, weight, win_percentage)


In [71]:
N_PLAYS = 5
NIM_SIZE = 5

#Every action has a weight and a win_percentage
#I will use the weight to do the Gaussian Mutation 
#I will use the win_percentage to find the best strategy


#Compute the %win with an action.
def fitness(action: callable) -> float:
    strategies = [action, pure_random]
    won = 0

    for i in range(N_PLAYS):  
        nimGame = Nim(NIM_SIZE) #rows of the game
        logging.info(f"init : {nim}") 
        player = 0
        while nimGame:
            ply = strategies[player](nimGame) 
            logging.info(f"ply: player {player} plays {ply}")
            nimGame.nimming(ply) 
            logging.info(f"status: {nimGame}")
            player = 1 - player 
        if player == 0:
            won += 1
            
    logging.info(f"status: Player 0 won {won} match with {action} strategy!")
    win_percentage = won / N_PLAYS
    
    if action == a1:
        strategy1.set_win_percentage(win_percentage)
        return strategy1._win_percentage
    if action == a2:
        strategy2.set_win_percentage(win_percentage)
        return strategy2._win_percentage
    if action == a3:
        strategy3.set_win_percentage(win_percentage)
        return strategy3._win_percentage
    if action == a4:
        strategy4.set_win_percentage(win_percentage)
        return strategy4._win_percentage
    if action == a5:
        strategy5.set_win_percentage(win_percentage)
        return strategy5._win_percentage



#I define the Mutation function that change the weight of the strategy using the fitness function and a Gaussian Mutation
#if the percentage of win is low -> small sigma 
#otherwise -> large sigma
def mutation(action: callable, sigma: float) -> callable:  
    if action == a1:
        if strategy1._win_percentage < 0.5:  
            weight = strategy1.get_weight() + np.random.normal(loc=0, scale=sigma, size=(strategy1.__len__())) 
            strategy1.set_weight(weight)
        else:
            weight = strategy1.get_weight() + np.random.normal(loc=0, scale=sigma+1, size=(strategy1.__len__())) 
            strategy1.set_weight(weight)
    
    if action == a2:
        if strategy2._win_percentage < 0.5:  
            weight = strategy2.get_weight() + np.random.normal(loc=0, scale=sigma, size=(strategy2.__len__())) 
            strategy2.set_weight(weight)
        else:
            weight = strategy1.get_weight() + np.random.normal(loc=0, scale=sigma+1, size=(strategy2.__len__())) 
            strategy2.set_weight(weight)
    
    if action == a3:
        if strategy3._win_percentage < 0.5:  
            weight = strategy3.get_weight() + np.random.normal(loc=0, scale=sigma, size=(strategy3.__len__())) 
            strategy3.set_weight(weight)
        else:
            weight = strategy3.get_weight() + np.random.normal(loc=0, scale=sigma+1, size=(strategy3.__len__())) 
            strategy3.set_weight(weight)

    if action == a4:
        if strategy4._win_percentage < 0.5:  
            weight = strategy4.get_weight() + np.random.normal(loc=0, scale=sigma, size=(strategy4.__len__())) 
            strategy4.set_weight(weight)
        else:
            weight = strategy4.get_weight() + np.random.normal(loc=0, scale=sigma+1, size=(strategy4.__len__())) 
            strategy4.set_weight(weight)
    
    if action == a5:
        if strategy5._win_percentage < 0.5:  
            weight = strategy5.get_weight() + np.random.normal(loc=0, scale=sigma, size=(strategy5.__len__())) 
            strategy5.set_weight(weight)
        else:
            weight = strategy5.get_weight() + np.random.normal(loc=0, scale=sigma+1, size=(strategy5.__len__())) 
            strategy5.set_weight(weight)
    
    return action
     
   
    

## Adaptive (μ+λ)-ES

In [72]:
def evolutionary_strategy(state: Nim) -> callable:
    μ = 2 #number of parents selected
    λ = 5 #number of childred generated by the parents
    sigma = 0.001

    #build initial population
    population = [a1, a2, a3, a4, a5]
    win = [] #cointains the percentage of win of other strategy
        
    best_fitness = None
  
    for step in range(100 // λ): 
        #for each individual in population 
        for i in population:
            #Evaluation = computing the fitness of an individual 
            win_percentage = fitness(i)
            win.append(win_percentage)
            if best_fitness is None or win_percentage > fitness(best_fitness):
                best_fitness = i

        #order the population in function of the %win [max..min]
        population =  population[win.argsort()][::-1]    
        #select the μ with max fitness and discard fitness - Truncation Selection
        #q = number of parent which survive
        q = population[:μ]

        #join operation that is the only difference with (μ, λ)
        population = q.copy()
        #for each individual_selected = individual whose fitness are gratest
        for z in q:
            for _ in range(λ//μ):
                mutated_individual = mutation(np.copy(z), sigma)  # Use np.copy for array, remove if z is an integer
                population.append(mutated_individual)

    player = best_fitness(state)
    return player


## Adaptive (μ, λ)-ES

In [73]:
def evolutionary_strategy_alwaysReplace(state: Nim) -> callable:
    μ = 2 #number of parents selected
    λ = 5 #number of childred generated by the parents
    sigma = 0.001

    #build initial population
    population = [a1, a2, a3, a4, a5]
    win = [] #cointains the percentage of win of other strategy
        
    best_fitness = None
  
    for step in range(100 // λ): 
        #for each individual in population 
        for i in population:
            #Evaluation = computing the fitness of an individual 
            win_percentage = fitness(i)
            win.append(win_percentage)
            if best_fitness is None or win_percentage > fitness(best_fitness):
                best_fitness = i

        #order the population in function of the %win [max..min]
        population =  population[win.argsort()][::-1]    
        #select the μ with max fitness and discard fitness - Truncation Selection
        #q = number of parent which survive
        q = population[:μ]

        #join operation that is done by just replacing the population with the childern
        population = []
        #for each individual_selected = individual whose fitness are gratest
        for z in q:
            for _ in range(λ//μ):
                mutated_individual = mutation(np.copy(z), sigma)  # Use np.copy for array, remove if z is an integer
                population.append(mutated_individual)

    player = best_fitness(state)
    return player

## Oversimplified match

In [74]:
logging.getLogger().setLevel(logging.INFO) 

es = evolutionary_strategy
strategies = [es, pure_random]

nim = Nim(5) #5 file
logging.info(f"init : {nim}") 
player = 0

while nim:
    ply = strategies[player](nim) 
    logging.info(f"ply: player {player} plays {ply}")
    nim.nimming(ply) 
    logging.info(f"status: {nim}")
    player = 1 - player 

logging.info(f"status: Player {player} won!")




INFO:root:init : <1 3 5 7 9>
INFO:root:init : <1 3 5 7 9>


TypeError: 'tuple' object does not support item assignment