In [34]:
from collections import namedtuple
import random
from copy import deepcopy
from itertools import accumulate
from operator import xor

## The *Nim* and *Nimply* classes

In [35]:
Nimply = namedtuple("Nimply", "row, num_objects")

In [36]:
class Nim:
    def __init__(self, num_rows: int) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)]

    def __bool__(self):
        return sum(self._rows) > 0

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">"

    @property
    def rows(self) -> tuple:
        return tuple(self._rows)

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply
        assert self._rows[row] >= num_objects
        self._rows[row] -= num_objects  

    def adding(self, ply: Nimply) -> None:
        row, num_objects = ply
        self._rows[row] += num_objects 
   

In [37]:
def nim_sum(state: Nim) -> int:
    *_, result = accumulate(state.rows, xor)
    return result


def cook_status(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) 
    ]
    cooked["active_rows_number"] = sum(o > 0 for o in state.rows)
    cooked["shortest_row"] = min((x for x in enumerate(state.rows) if x[1] > 0), key=lambda y: y[1])[0]
    cooked["longest_row"] = max((x for x in enumerate(state.rows)), key=lambda y: y[1])[0]
    cooked["nim_sum"] = nim_sum(state)

    brute_force = list()
    for m in cooked["possible_moves"]:
        tmp = deepcopy(state)
        tmp.nimming(m)
        brute_force.append((m, nim_sum(tmp)))
    cooked["brute_force"] = brute_force

    return cooked

# for the minmax function, to save a bit the computational cost, because compute only the possible moves
# (instead of use "cook_status(state: Nim) -> dict:") 
def cook_status_for_mM(state: Nim) -> dict:
    cooked = dict()
    cooked["possible_moves"] = [
        (r, o) for r, c in enumerate(state.rows) for o in range(1, c + 1) 
    ]
    return cooked


### Some constants

In [38]:
NIM_SIZE= 3
NUM_MATCHES= 100

### Optimal (Nim-Sum) Agent

In [39]:
def optimal_strategy(state: Nim) -> Nimply:
    data = cook_status(state)
    return next((bf for bf in data["brute_force"] if bf[1] == 0), random.choice(data["brute_force"]))[0]

### Pure random Agent

In [40]:
def pure_random(state: Nim) -> Nimply:
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

## MinMax Agent

In [41]:
def eval_state(state: Nim, p1_turn):
    if not p1_turn and not state: #p1 wins
        return 1
    elif p1_turn and not state: #p1 loses
        return -1
    else:
        return 0

In [42]:
def minmax(state: Nim, maximizing):
    val = eval_state(state, maximizing)
    if val != 0:
        return None, val
    data= cook_status_for_mM(state)
    possible= data["possible_moves"]
    evaluations = list()
    if maximizing:
        for ply in possible:
            temp = deepcopy(state)
            temp.nimming(ply)
            _, val = minmax(temp, maximizing)
            evaluations.append((ply, -val))
        return  max(evaluations, key=lambda k: k[1])      
    else:
        for ply in possible:
            temp = deepcopy(state)
            temp.nimming(ply)
            _, val = minmax(temp, maximizing)
            evaluations.append((ply, -val))
        return  min(evaluations, key=lambda k: k[1]) 

### Alpha-Beta Pruning

In [43]:
def minmax_alpha_beta(state: Nim, maximizing, alpha=-1 , beta=1):
    val = eval_state(state, maximizing)
    if val != 0:
        return None, val
    data= cook_status_for_mM(state)
    possible= data["possible_moves"]
    if maximizing:
        best = -1
        best_ply=()
        for ply in possible:
            temp = deepcopy(state)
            temp.nimming(ply)
            _, val = minmax_alpha_beta(temp, maximizing, alpha, beta)
            if best<= -val:
                best_ply= ply
                best= -val
            alpha= max(alpha, best)
            if beta <= alpha: #pruning
                break
        return  best_ply, best    
    else:
        best = 1
        best_ply=()
        for ply in possible:
            temp = deepcopy(state)
            temp.nimming(ply)
            _, val = minmax_alpha_beta(temp, maximizing, alpha, beta)
            if best>= -val:
                best_ply= ply
                best= -val
            alpha= min(alpha, best)
            if beta <= alpha: #pruning
                break
        return  best_ply, best    

### Evaluation

In [44]:
p1_wins=0
first_move_p1= 0

for m in range(NUM_MATCHES):
    x= Nim(NIM_SIZE)
    if m%2== 0:
        first_move_p1+=1
        p1_turn= True
    else:
        p1_turn= False
    while(x):
        if(p1_turn):
            #ply, _ = minmax(x, p1_turn)
            ply, _ = minmax_alpha_beta(x, p1_turn)
            x.nimming(ply)
            p1_turn= False
        else:
            #ply= pure_random(x)
            ply= optimal_strategy(x)
            x.nimming(ply)
            p1_turn= True
    if(not p1_turn):
        p1_wins+=1
    
print("After", NUM_MATCHES, "matches:\nP1 (minmax) won ",p1_wins," matches (starting first ", first_move_p1," times!).\nP2 (with optimal) won ",NUM_MATCHES-p1_wins," matches (starting first ", NUM_MATCHES-first_move_p1," times!).")
    
        

After 100 matches:
P1 (minmax) won  50  matches (starting first  50  times!).
P2 (with optimal) won  50  matches (starting first  50  times!).
