# Lab 3: ES

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The goal of the game is to **avoid** taking the last object.

* Task2.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task2.2: An agent using evolved rules using ES

## Instructions

* Create the directory `lab2` inside your personal course repository for the course 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.



In [938]:
import logging
from pprint import pprint, pformat
from collections import namedtuple
import random
from copy import *


## The *Nim* and *Nimply* classes

In [939]:
Nimply = namedtuple("Nimply", "row, num_objects")


In [940]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects


## Sample (and silly) startegies 

In [941]:
import numpy as np


def nim_sum(state: Nim) -> int:
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns != 0]
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply


In [942]:
POPULATION_SIZE = 30
GENE_SIZE = 5
GENERATIONS = 10
OFFSPRING_SIZE = POPULATION_SIZE // 2 if (POPULATION_SIZE//2) % 2 == 0 else (POPULATION_SIZE // 2) - 1
MUTATION_PROBABILITY = .15

In [943]:
def pure_random_mode(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)


def gabriele_mode(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))


def conservative_mode(state):
    """Pick always the minimum possible number of elements"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*min(possible_moves, key=lambda m: (m[0], m[1])))


def balance_rows_mode(state: Nim) -> Nimply:
    """Balance the number of elements between rows"""
    row_sums = sum(state.rows)
    target_avg = row_sums // len(state.rows)
    
    # Find the row with more elements than the average
    row_to_reduce = next((i for i, count in enumerate(state.rows) if count > target_avg), None)
    
    if row_to_reduce:
        num_objects = state.rows[row_to_reduce] - target_avg
        return Nimply(row_to_reduce, num_objects)
    
    # If all rows have the same or fewer elements than the average, make a random move
    return pure_random_mode(state)


def limited_move_mode(state: Nim, max_elements: int = 3) -> Nimply:
    """Make a move with a limit on the number of elements to be removed"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = min(random.randint(1, state.rows[row]), max_elements)
    return Nimply(row, num_objects)



In [944]:
STRATEGIES = [pure_random_mode,gabriele_mode,conservative_mode,balance_rows_mode,limited_move_mode]

In [945]:
class NimAgent:
    def __init__(self):
        self.genotype = self.initialize_gen()
        self.score = 0


    def initialize_gen(self):
        gen = np.random.rand(GENE_SIZE)
        gen /= np.sum(gen, axis=0, keepdims=True)  # Normalizzazione
        return list(gen)
    

In [946]:
population = [NimAgent() for _ in range(POPULATION_SIZE)]  # Crea una lista di oggetti NimAgent

def print_population(population):
    for i, agent in enumerate(population):
        print(f"Agent {i + 1} - Genotype: {agent.genotype}, Score: {agent.score}")

In [947]:
# Funzione per ottenere una mossa basata sulle probabilità di strategia assegnate ad ogni individuo
def get_strategy(weights):
    selected_strategy = np.random.choice(STRATEGIES, p=weights)
    return selected_strategy


In [948]:
def fitness(p):
    for agent in p:
        agent.score = agent.score / ((POPULATION_SIZE-1)*2)
    return p

In [949]:
def evaluate(population): 
    for i in range(POPULATION_SIZE):
        player1 = population[i].genotype
        for j in range(POPULATION_SIZE):
            if j!=i: 
                player2 = population[j].genotype
                nim = Nim(5)
                player = 0
                while nim:
                    if player == 0:
                        strategy = get_strategy(player1)
                        ply = strategy(nim)
                    else: 
                        strategy = get_strategy(player2)
                        ply = strategy(nim)
                    nim.nimming(ply)
                    player = 1 - player
                
                if player==0: 
                    population[i].score += 1
                else: 
                    population[j].score += 1
    population = fitness(population)
    return population

            


In [950]:
def crossover_agents(agents):
    offspring = []

    for i in range(1, len(agents), 2):
        parent1 = agents[i - 1]
        parent2 = agents[i]

        # Doing crossover
        child1_genotype, child2_genotype = crossover(parent1.genotype, parent2.genotype)

        # New agents after crossover
        child1 = NimAgent()
        child2 = NimAgent()

        child1.genotype = child1_genotype
        child2.genotype = child2_genotype

        offspring.extend([child1, child2])

    return offspring

def crossover(genotype1, genotype2):
    crossover_point = random.randint(1, GENE_SIZE - 1)
    child1_genotype = genotype1[:crossover_point] + genotype2[crossover_point:]
    child2_genotype = genotype2[:crossover_point] + genotype1[crossover_point:]

    # Normalize the genotypes
    child1_genotype /= np.sum(child1_genotype, axis=0, keepdims=True)
    child2_genotype /= np.sum(child2_genotype, axis=0, keepdims=True)

    return list(child1_genotype), list(child2_genotype)


In [951]:
def mutate(genotype):
    i = random.choice(range(GENE_SIZE))
    if random.random() < MUTATION_PROBABILITY:
        genotype[i] += abs(random.uniform(-0.15, 0.15))
    genotype /= np.sum(genotype, axis=0, keepdims=True)  # Normalize again after mutation
    return list(genotype)

In [952]:
for g in range(GENERATIONS):
    # evaluate population 
    population = evaluate(population)
    population.sort(key=lambda agent: agent.score, reverse=True)

    # select parent
    population = population[:POPULATION_SIZE-OFFSPRING_SIZE]
    #print_population(population)
    [setattr(agent, 'score', 0) for agent in population]
    offspring = population[:OFFSPRING_SIZE].copy()
    #print_population(offspring)

    # crossover
    offspring = crossover_agents(offspring)

    # mutation 
    for agent in offspring:
        agent.genotype = mutate(agent.genotype)
    #print_population(offspring)
    population = population + offspring
    #print_population(population)

best_agent = population[0]
print(best_agent.genotype)


[0.16669969479606686, 0.0523319861610638, 0.4403431403918838, 0.17885076329007932, 0.16177441536090628]


In [962]:
logging.getLogger().setLevel(logging.INFO)

best_agent_wins = 0
adversary_wins = 0

for i in range(50):
    nim = Nim(5)
    player = 0
    while nim:
        if player == 0:
            strategy = get_strategy(best_agent.genotype)
            ply = strategy(nim)
        else:
            ply = optimal(nim)
        nim.nimming(ply)
        player = 1 - player
    if player == 0: 
        best_agent_wins += 1
    else: 
        adversary_wins += 1
print("best agent wins: ", best_agent_wins)
print("optimal agent wins: ", adversary_wins)


best agent wins:  13
optimal agent wins:  37


In [965]:
logging.getLogger().setLevel(logging.INFO)

best_agent_wins = 0
adversary_wins = 0

for i in range(50):
    nim = Nim(5)
    player = 0
    while nim:
        if player == 0:
            strategy = get_strategy(best_agent.genotype)
            ply = strategy(nim)
        else:
            ply = pure_random_mode(nim)
        nim.nimming(ply)
        player = 1 - player
    if player == 0: 
        best_agent_wins += 1
    else: 
        adversary_wins += 1
print("best agent wins: ", best_agent_wins)
print("pure random agent wins: ", adversary_wins)

best agent wins:  28
pure random agent wins:  22


In [966]:
logging.getLogger().setLevel(logging.INFO)

best_agent_wins = 0
adversary_wins = 0

for i in range(50):
    nim = Nim(5)
    player = 0
    while nim:
        if player == 0:
            strategy = get_strategy(best_agent.genotype)
            ply = strategy(nim)
        else:
            ply = gabriele_mode(nim)
        nim.nimming(ply)
        player = 1 - player
    if player == 0: 
        best_agent_wins += 1
    else: 
        adversary_wins += 1
print("best agent wins: ", best_agent_wins)
print("gabriele agent wins: ", adversary_wins)

best agent wins:  48
gabriele agent wins:  2


In [990]:
logging.getLogger().setLevel(logging.INFO)

best_agent_wins = 0
adversary_wins = 0

for i in range(50):
    nim = Nim(5)
    player = 0
    while nim:
        if player == 0:
            strategy = get_strategy(best_agent.genotype)
            ply = strategy(nim)
        else:
            ply = conservative_mode(nim)
        nim.nimming(ply)
        player = 1 - player
    if player == 0: 
        best_agent_wins += 1
    else: 
        adversary_wins += 1
print("best agent wins: ", best_agent_wins)
print("conservative agent wins: ", adversary_wins)

best agent wins:  27
conservative agent wins:  23
