# Task 3.3: Min-max agent
Player 0 = maximizing <br/>
Player 1 = minimizing

In [35]:
import logging
from collections import namedtuple
import random
from typing import Callable
from copy import deepcopy
from itertools import accumulate
from operator import xor
import numpy as np

In [36]:
Nimply = namedtuple("Nimply", "row, num_objects") # move

In [37]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]
        self._k = k

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    @property
    def k(self) -> int:
        return self._k

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        assert self._k is None or num_objects <= self._k
        self._rows[row] -= num_objects

In [38]:
def all_possible_new_states(init_state: list) -> list:
    """Define all possible new states and return them as a list"""
    possible_new_states = []

    for row in range(len(init_state)): # for every row
        for i in range(init_state[row]): # for the number of elem in row
            new_state = deepcopy(init_state)

            new_state[row] = new_state[row] - i - 1 #take away i+1 number of elem
            #print("new state: ", new_state)
            possible_new_states.append(new_state)
            #print(possible_new_states)
    #logging.debug(f"All possible new states: {possible_new_states}")
    return possible_new_states
            

In [39]:
state = [1,3,5]
print("poss state: ", all_possible_new_states(state))


poss state:  [[0, 3, 5], [1, 2, 5], [1, 1, 5], [1, 0, 5], [1, 3, 4], [1, 3, 3], [1, 3, 2], [1, 3, 1], [1, 3, 0]]


In [40]:
def evaluate(state_as_list: list, player: int) -> int:
    """Evaluate if the game is over. Returns: 
    -1 if player 0 (maximizing) won
    1 if player 1 (minimizing) won
    None if the game is not over yet"""
    
    if sum(state_as_list) == 0: # if game is over
        return -1 if player == 0 else 1
    else:
        return None

In [41]:
def minimax(state_as_list: list, player: int) -> int:
    """Calculates all possible moves using the minmax method from a given state and then returns the score. It returns the best score considering the player.
    for player 0, best score is 1
    for player 1, best score is -1"""
    score = evaluate(state_as_list, player)

    if score != None:
        # if game is over
        return score

    if (player == 0): # if it is the maximizings turn
        scores = [minimax(new_state, player=1) for new_state in all_possible_new_states(state_as_list)]
        return max(scores)
    
    else: # if it is the minimizings turn
        scores = [minimax(new_state, player=0) for new_state in all_possible_new_states(state_as_list)]
        return min(scores)



In [42]:
def best_move(state_as_list: list, player: int) -> tuple:
    """Returns a tuple (winner, new_state)"""
    if player == 0: # if it is the maximizings turn
        new_player = 1 
        
        # calculate the moves for the minimizer and then pic the best one
        return max(
            (minimax(new_state, new_player), new_state) 
            for new_state in all_possible_new_states(state_as_list)
        )
    else: # it is the minimizings turn
        new_player = 0

        #calculate the moves for the maximizer and then try to minimize them
        return min( 
            (minimax(new_state, new_player), new_state)
        for new_state in all_possible_new_states(state_as_list))
    


In [43]:
test = [1,1,5]
t = best_move(test, 1)
print(t)

(-1, [1, 1, 0])


In [44]:
def minimax_pruning(state_as_list: list, is_maximizing: bool, alpha=-1, beta=1) -> int:
    if (score := evaluate(state_as_list, is_maximizing)) is not None: 
        # if the game is over
        return score

    scores = []
    for new_state in all_possible_new_states(state_as_list):
        scores.append(
            score := minimax_pruning(new_state, not is_maximizing, alpha, beta)
        )
        
        if is_maximizing:
            alpha = max(alpha, score)
        else:
            beta = min(beta, score)

        if beta <= alpha:
            break
        
    return (max if is_maximizing else min)(scores)



In [45]:
def best_move_pruning(state_as_list: list, player: int) -> tuple:
    """ """
    logging.debug(f"best_move_pruning, player = {player}")
    if player == 0: # maximizing
        return max(
            (minimax_pruning(new_state, is_maximizing=False), new_state)
            for new_state in all_possible_new_states(state_as_list)
        )
    else:
        return min(
            (minimax_pruning(new_state, is_maximizing=True), new_state)
            for new_state in all_possible_new_states(state_as_list)
        )

In [46]:
s = best_move_pruning([1,2,3], 0)
print(s)

(-1, [1, 2, 2])


In [47]:
def nimply_move(current_state: Nim, new_state: list) -> Nimply:
    
    diff = 0
    row = len(current_state.rows) #invalid row
    
    for i in range(len(current_state.rows)):
        
        if current_state.rows[i] != new_state[i]:
            diff = current_state.rows[i] - new_state[i]
            row = i
            
    ply = Nimply(row, diff)

    return ply

In [48]:
def min_max_agent(state: Nim):

    state_as_list = [] # convert the state to list to easier handle the recursion later on

    for i in range(len(state.rows)):
        state_as_list.append(state.rows[i])

    logging.debug(f"State as a list: {state_as_list}")

    best_move = best_move_pruning(state_as_list, player=0)
    

    move = best_move[1] # since best_move = (score, [new state])
    logging.debug(f"MOVE: {move}")
    # from best move -> turn it into a nimply
    final_move = nimply_move(state, move)

    return final_move

In [49]:
state = [1,1,1,5]
logging.getLogger().setLevel(logging.DEBUG)

player = 1

s = Nim(3)
res = min_max_agent(s)
print("RES:", res)

#print("state: ", state)

#scores = best_move(state, player)
#print(scores)

#sc = best_move_pruning(state, player)
#print(f"sc {sc}")
#"""

DEBUG:root:State as a list: [1, 3, 5]
DEBUG:root:MOVE: [1, 3, 2]


RES: Nimply(row=2, num_objects=3)


In [50]:
def play_nim(agent1: Callable, opponent: Callable):
    nim = Nim(4)
    logging.debug(f"status: Initial board  -> {nim}")
    
    strategy = (agent1, opponent)
    player = 0

    while nim:
        ply = strategy[player](nim)
        nim.nimming(ply)
        logging.debug(f"status: After player {player} -> {nim}")
        player = 1 - player
    winner = 1 - player
    logging.info(f"status: Player {winner} won!")

play_nim(min_max_agent, min_max_agent)

DEBUG:root:status: Initial board  -> <1 3 5 7>
DEBUG:root:State as a list: [1, 3, 5, 7]


KeyboardInterrupt: 