# Lab 3.3 MinMax


## Task

Write agents able to play [*Nim*](https://en.wikipedia.org/wiki/Nim), with an arbitrary number of rows and an upper bound $k$ on the number of objects that can be removed in a turn (a.k.a., *subtraction game*).

The player **taking the last object wins**.

* Task3.3: An agent using minmax
* Task3.4: An agent using reinforcement learning

## Instructions

* Create the directory `lab3` inside the course repo 
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

## Deadlines ([AoE](https://en.wikipedia.org/wiki/Anywhere_on_Earth))

* Sunday, December 4th for Task3.1 and Task3.2
* Sunday, December 11th for Task3.3 and Task3.4
* Sunday, December 18th for all reviews

## Util functions from previous task

Added function:
* IsSafe: return true if a state is safe in a nim-sum based strategy 
* MoveFromOtherNim: calculate the difference between two nim state and return the move as Nimply tuple
* fromString: change nim state from actual to a state given by a string
* lastMove: True if last move is avaiable and the agent could win the game

In [65]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
from functools import reduce
import numpy as np

Nimply = namedtuple("Nimply", "row, num_objects")

class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"
    
    def __eq__(self, __o: object) -> bool:
        if self._rows==__o._rows and self._k==__o._k:
            return True
        else:
            return False

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

    def isSafe(self) -> bool:
        
        if(self.__bool__()==False):
            return False
        else:
            return safeness(self)==0
    
    def moveFromOtherNim(self,__o: object)->Nimply:
        for i in range(len(self._rows)):
            if(self._rows[i]==__o._rows[i]):
                continue
            else:
                return Nimply(i,self._rows[i]-__o._rows[i])
                
        raise Exception("Problem with moves with other nim")
    
    def fromString(self,S: str)->None:

        string=S.replace("<","")
        string=string.replace(">","")
        list_s=string.split(" ")

        lista_row=list()
        for i in list_s:
            lista_row.append(int(i))
        self._rows=lista_row
    
    def last_move(self)->bool:
        c=0
        for i in self._rows:
            if i>0:
                c+=1
        if c==1:
            return True
        else:
            return False

        



def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result


def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) if state.k is None or o <= state.k
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    
    if(cooked["active_rows_number"]>0):
    
        cooked["valide_row"]= [ r for r,c in enumerate(state.rows) if state._rows[r]>0]
        cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
        cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
        cooked["nim_sum"] = nim_sum(state)
        
        next_childs=list()
        for m in cooked["possible_moves"]:
            tmp = deepcopy(state)
            tmp.nimming(m)
            next_childs.append(tmp)
        cooked['next_nodes']=next_childs
        
        brute_force = list()
        for m in cooked["possible_moves"]:
            tmp = deepcopy(state)
            tmp.nimming(m)
            brute_force.append((m, nim_sum(tmp)))
        cooked["brute_force"] = brute_force
    
    else:
        cooked['next_nodes']=None

    return cooked


def safeness(state: Nim):
    return reduce((lambda x,y: x ^ y), state._rows)

def comeback(state: Nim):  
    actual_cost = 10000
    *_, XOR = accumulate(state.rows, xor)
    r_cost = 0
    for _, r in enumerate(state._rows):
        #print(r)
        new_cost = abs((XOR ^ r) - r)
        # search for a minimum cost from the possible moves
        if (actual_cost > new_cost):
            r_cost = ((XOR ^ r) - r)
            if r_cost < 0: 
                actual_cost = (XOR ^ r) - r
            idx = r
    # returns the r_cost found that is the minimum for the possible moves
    return state._rows.index(idx),-r_cost

def expert_system_wikipedia(state: Nim):
    # An agent using fixed rules based on *nim-sum* by the rules of safeness described on wikipedia
    # https://it.wikipedia.org/wiki/Nim
    
    safe = reduce((lambda x,y: x ^ y), state._rows) == 0 # a safe state 
    
    if all([n <= 1 for n in state._rows]) or safe:  # if the rows have only 1s and 0s value, there is no strategy, we proceed randomly
        return pure_random(state)
    else: #not a safe state -> look for safety, the player looks to leave the board in a safe state
        return comeback(state)

def pure_random(state: Nim) -> Nimply:
    '''simple strategy, it just choose randomly from the rows'''
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

def optimal_strategy(state: Nim) -> Nimply:
    '''optimal_startegy is based on brute force approach ''' 
    data = cook_status(state)
    # with next we give back the moves once the object to remove has been removed
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

def evolve(genome: dict) -> Callable:
    def evolvable(state: Nim) -> Nimply:
        '''evolvable strategy consists in a choice between the shortest row or he longest one'''
        data = cook_status(state)

        if random.random() < genome["p"]:
            ply = Nimply(data["shortest_row"], random.randint(1, state.rows[data["shortest_row"]]))
        else:
            ply = Nimply(data["longest_row"], random.randint(1, state.rows[data["longest_row"]]))

        return ply

    return evolvable

# Task3.3: An agent using minmax

It is possible to select the depth of the tree.<br/>
Deeper trees lead to better results but at very high processing times.

In [66]:
from anytree import Node, RenderTree, DoubleStyle,LevelOrderGroupIter

DEPTH=4
N=0

def make_tree(nim: Nim) -> Node:
    #root=Node(nim)
    root=Node((nim.__str__(),0,0,nim.isSafe()))
    res=recursive_add(root,cook_status(nim)['next_nodes'],1)
    root.name=((nim.__str__(),res,0,nim.isSafe()))
    return root

def recursive_add(node: Node, children: list,depth: int):
    
    count=0
    for i in children:
        
        if(i.isSafe()):
            n=1
        else:
            n=0
        #LEAF
        res=0
        
        if(cook_status(i)['next_nodes']==None):
            my_N=Node((i.__str__(),0,depth,i.isSafe()),parent=node)
            return 0
        else:
            my_N=Node((i.__str__(),n,depth,i.isSafe()),parent=node)
            if depth<DEPTH:
                res=recursive_add(my_N,cook_status(i)['next_nodes'],depth+1)
                my_N.name=(i.__str__(),res,depth,i.isSafe())
                count+=res
            count+=n
            
    return count



## Agent

In [67]:

tree=None

def minmax_agent(state: Nim) -> Nimply:
    
    global tree
    nodes=[[node for node in children] for children in LevelOrderGroupIter(tree,maxlevel=2)]
    
    #CHECK IF TREE IS UP TO DATE

    root=nodes[0][0]
    root_name=root.name[0]
    nim_root=Nim(0)
    nim_root.fromString(root_name)
    if(state.__eq__(nim_root)):
        pass
    else:
        for i in nodes[1]:
            F=Nim(0)
            F.fromString(i.name[0])
            if(state.__eq__(F)):
                i.parent=None
                tree=i
                break
    

    #CHECK BEST MOVE
    nodes=[[node for node in children] for children in LevelOrderGroupIter(tree,maxlevel=2)]
    
    #Final-move check
    
    root=nodes[0][0]
    root_name=root.name[0]
    nim_root=Nim(0)
    nim_root.fromString(root_name)
    if(nim_root.last_move()):
        for i,j in enumerate(nim_root.rows):
            if j>0:
                return Nimply(i,j)
    

    lower=np.inf
    lowerNode=None

    for i in nodes[1]:
        if(i.name[1]<lower):
            lowerNode=i
            lower=i.name[1]

    nim_temp=Nim(0)
    
    nim_temp.fromString(lowerNode.name[0])
    
    move=state.moveFromOtherNim(nim_temp)
    
    #update tree

    tree=make_tree(nim_temp)
    
    '''
    print("tree2=")
    print(RenderTree(tree, style=DoubleStyle))
    print("\n\n")
    print("---------------------------------------------")
    '''
    return move

## Evaluation

### Against Pure random

In [70]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (minmax_agent, pure_random)
N=5
nim = Nim(N)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
DEPTH=5
tree=make_tree(nim)
while nim:  #while nim is > 0, i.e. we have at least an object to remove
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    
    player = 1 - player
    
winner = 1 - player

logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9>
DEBUG:root:status: After player 0 -> <1 3 5 7 0>
DEBUG:root:status: After player 1 -> <1 1 5 7 0>
DEBUG:root:status: After player 0 -> <1 1 5 0 0>
DEBUG:root:status: After player 1 -> <1 0 5 0 0>
DEBUG:root:status: After player 0 -> <0 0 5 0 0>
DEBUG:root:status: After player 1 -> <0 0 1 0 0>
DEBUG:root:status: After player 0 -> <0 0 0 0 0>
INFO:root:status: Player 0 won!


Lastmove


### Against Professor's strategy

In [68]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (minmax_agent, evolve({'p':0.5}))
N=5
nim = Nim(N)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
DEPTH=5
tree=make_tree(nim)
while nim:  #while nim is > 0, i.e. we have at least an object to remove
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    
    player = 1 - player
    
winner = 1 - player

logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9>
DEBUG:root:status: After player 0 -> <1 3 5 7 0>
DEBUG:root:status: After player 1 -> <0 3 5 7 0>
DEBUG:root:status: After player 0 -> <0 3 5 0 0>
DEBUG:root:status: After player 1 -> <0 0 5 0 0>
DEBUG:root:status: After player 0 -> <0 0 0 0 0>
INFO:root:status: Player 0 won!


Lastmove


### Against nim-sum agent

In [69]:
logging.getLogger().setLevel(logging.DEBUG)

strategy = (minmax_agent, expert_system_wikipedia)
N=5
nim = Nim(N)
logging.debug(f"status: Initial board  -> {nim}")
player = 0
DEPTH=5
tree=make_tree(nim)
while nim:  #while nim is > 0, i.e. we have at least an object to remove
    ply = strategy[player](nim)
    nim.nimming(ply)
    logging.debug(f"status: After player {player} -> {nim}")
    
    player = 1 - player
    
winner = 1 - player

logging.info(f"status: Player {winner} won!")

DEBUG:root:status: Initial board  -> <1 3 5 7 9>
DEBUG:root:status: After player 0 -> <1 3 5 7 0>
DEBUG:root:status: After player 1 -> <0 3 5 7 0>
DEBUG:root:status: After player 0 -> <0 3 5 0 0>
DEBUG:root:status: After player 1 -> <0 3 3 0 0>
DEBUG:root:status: After player 0 -> <0 0 3 0 0>
DEBUG:root:status: After player 1 -> <0 0 0 0 0>
INFO:root:status: Player 1 won!
