Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 3: Policy Search

## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.1: An agent using fixed rules based on *nim-sum* (i.e., an *expert system*)
* Task3.2: An agent using evolved rules
* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

## Deadlines ([AoE](https://en.wikipedia.org/wiki/Anywhere_on_Earth))

* Sunday, December 4th for Task3.1 and Task3.2
* Sunday, December 11th for Task3.3 and Task3.4
* Sunday, December 18th for all reviews

In [1]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
import random
import math
import numpy as np
import sys

## The *Nim* and *Nimply* classes

In [2]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [3]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

## Task 3.1: hardcoded strategies

In [4]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [5]:
def gabriele(state: Nim) -> Nimply:
    """Pick always the maximum possible number of the lowest row"""
    possible_moves = [(r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1)]
    return Nimply(*max(possible_moves, key=lambda m: (-m[0], m[1])))

In [6]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result


def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

In [7]:
def optimal_startegy(state: Nim) -> Nimply:
    data = cook_status(state)
    k =  next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]
    return k

In [8]:
def make_strategy(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        data = cook_status(state)

        if random.random() < genome["p"]:
            ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
        else:
            ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))

        return ply

    return evolvable

In [38]:
def big_row_strategy(state: Nim) -> Nimply:
    n_big_rows = sum(o > 1 for o in state.rows)

    if n_big_rows == 1:
        data = cook_status(state)
        for row, num_object in data['possible_moves']:
            tmp_state = deepcopy(state)
            tmp_Nimply = Nimply(row, num_object)
            tmp_state.nimming(tmp_Nimply)
            total_rows = sum([row for row in tmp_state.rows])
            if total_rows % 2 == 0:
                return tmp_Nimply
    else:
        return optimal_startegy(state)



## Strategy evaluator

In [41]:
def evaluate(strategy_player_0: Callable, strategy_player_1: Callable, N_MATCHES, NIM_SIZE) -> float:
    opponent = (strategy_player_0, strategy_player_1)
    won = 0
    for m in range(N_MATCHES):
        nim = Nim(NIM_SIZE)
        player = 0
        while sum(nim.rows) > 0:
            ply = opponent[player](nim)
            nim.nimming(ply)
            player = 1 - player
        winner = 1 - player
        if winner == 0:
            won += 1
    return won / N_MATCHES

In [42]:
N_MATCHES = 100
NIM_SIZE = 11
start = 1
evaluate(big_row_strategy,optimal_startegy, N_MATCHES, NIM_SIZE)

0.48

## Simple match between two players using different hardcoded strategies

In [106]:
logging.getLogger().setLevel(logging.DEBUG)

players_strategies = (big_row_strategy, optimal_startegy)

nim = Nim(11)
logging.debug(f"status: Initial board  -> {nim}")
player = 1
while nim:
    ply = players_strategies[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 1 -> <1 3 5 7 9 11 13 15 6 19 21>
DEBUG:root:status: After player 0 -> <1 3 5 7 2 11 13 15 6 19 21>
DEBUG:root:status: After player 1 -> <1 3 5 7 2 0 13 15 6 19 21>
DEBUG:root:status: After player 0 -> <1 3 5 7 2 0 13 15 6 19 20>
DEBUG:root:status: After player 1 -> <0 3 5 7 2 0 13 15 6 19 20>
DEBUG:root:status: After player 0 -> <0 3 5 7 2 0 13 3 6 19 20>
DEBUG:root:status: After player 1 -> <0 3 5 7 2 0 1 3 6 19 20>
DEBUG:root:status: After player 0 -> <0 3 5 7 2 0 1 3 6 13 20>
DEBUG:root:status: After player 1 -> <0 3 5 7 2 0 1 3 6 13 10>
DEBUG:root:status: After player 0 -> <0 3 5 7 2 0 1 3 6 13 7>
DEBUG:root:status: After player 1 -> <0 3 5 7 2 0 1 3 6 0 7>
DEBUG:root:status: After player 0 -> <0 3 5 7 2 0 1 0 6 0 7>
DEBUG:root:status: After player 1 -> <0 0 5 7 2 0 1 0 6 0 7>
DEBUG:root:status: After player 0 -> <0 0 5 7 0 0 1 0 6 0 7>
DEBUG:root:status: After player 1 -> <0 0 5 5 0

## Task 3.2

In [529]:
def make_strategy_g(genome: np.array) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        rows = np.array(state.rows)
        new_genome = genome.copy()
        rows_without_0 = rows[np.where(rows > 0)[0]]
        new_genome[np.where(rows == 0)[0]] = 2**63-1
        new_genome[np.where(new_genome == 0)[0]] = 2**63-1 ##kkk
        min_genome = min(new_genome)
        min_state = min(rows_without_0)
        id_state = random.choice(np.where(np.array(state.rows) == min_state)[0])
        if min_genome > min_state and min_state:
            ply = Nimply(id_state, int(min_state))
            return ply

        ids = np.where(genome == min_genome)[0]
        for id in ids:
            if min_genome <= state.rows[id]:
                pos_genome = id
                break
        ply = Nimply(pos_genome, int(min_genome))
        return ply

    return evolvable

Definition of class individual

In [530]:
class Individual:
    def __init__(self, genome: list):
        self.genome = np.array(genome)
        self.fitness = evaluate(make_strategy_g(self.genome), optimal_startegy, 100, NIM_SIZE)

Hyper parameters

In [531]:
POPULATION_SIZE = 10
GENOME_SIZE = NIM_SIZE = 11
NUM_GENERATIONS = 10
OFFSPRING_SIZE = 15
MUTATION_RATE = 0.35


In [532]:
nim = Nim(NIM_SIZE)

Generate population

In [533]:
population = [Individual(genome=[random.choice([i for i in range(1, nim.rows[ln]+1)]) for ln in range(NIM_SIZE)]) for i in range(POPULATION_SIZE)]

Define GA operators

In [534]:
def tournament(population, tournament_size=2):
    return max(random.choices(population, k=tournament_size), key=lambda i: i.fitness)


def crossover(g1, g2):
    cut = random.randint(0, len(g1))
    return list(np.concatenate((g1[:cut],g2[cut:])))
    #return g1[:cut] + g2[cut:]


def mutation(g):
    point = random.randint(0, len(g) - 1)
    nim_row = nim.rows[point]
    k = [i for i in range(g[point], nim_row+1)]
    return list(np.concatenate((g[:point],np.array((random.choice(k) - g[point],)), g[point + 1 :])))
    #return g[:point] + list((1 - g[point],)) + g[point + 1 :]

GA algorithm

In [535]:
for generation in range(NUM_GENERATIONS):
    offspring = list()
    genomes = list(map(lambda x: list(x.genome), population))
    for i in range(OFFSPRING_SIZE):
        p1 = tournament(population)
        p2 = tournament(population)
        genome = crossover(p1.genome, p2.genome)
        if random.random() < MUTATION_RATE:
             genome = mutation(genome)
        if genome not in genomes:
          individual = Individual(genome=genome)
          offspring.append(individual)
    population += offspring
    population = sorted(population, key=lambda i: i.fitness)[:POPULATION_SIZE]
    print(population[0].fitness)

14 18
14 18
2 4
7 10
8 14
1.0
6 8
2 4
1 16
10 12
14 18
1 2
5 22
1.0
4 6
13 16
2 22
4 10
1 16
9 16
1 2
1 2
1 2
1.0
3 22
7 10
3 6
1 4
1.0
1 2
6 10
18 20
9 12
1.0
1 2
5 14
5 20
1 2
2 14
3 8
7 8
6 10
1.0
1 6
1 2
5 22
1 2
4 16
1.0
1 4
9 10
8 10
3 18
1.0
7 14
8 18
4 6
2 8
5 20
1.0
1 4
5 22
8 10
14 16
1 6
4 16
9 12
1.0


In [536]:
logging.getLogger().setLevel(logging.DEBUG)

players_strategies = (make_strategy_g(population[0].genome), optimal_startegy)

nim = Nim(NIM_SIZE)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
while nim:
    ply = players_strategies[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    player = 1 - player
winner = 1 - player
logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 0 -> <0 3 5 7 9 11 13 15 17 19 21>
DEBUG:root:status: After player 1 -> <0 3 5 7 9 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 2 5 7 9 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 2 4 7 9 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 1 4 7 9 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 1 4 4 9 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 0 4 4 9 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 0 4 4 8 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 0 3 4 8 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 0 3 3 8 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 0 2 3 8 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 0 2 2 8 11 13 15 7 19 21>
DEBUG:root:status: After player 0 -> <0 0 1 2 8 11 13 15 7 19 21>
DEBUG:root:status: After player 1 -> <0 0 1 1 8 11 13 15 7 19 21>
DEBUG:ro