In [238]:
import logging
from pprint import pprint, pformat
from collections import namedtuple, defaultdict
import random
from copy import deepcopy
import numpy as np
import math

In [239]:
Nimply = namedtuple("Nimply", "row, num_objects") # Nimply(row, num_objects)

In [240]:
class Nim:
    def __init__(self, num_rows: int, k: int = None) -> None:
        self._rows = [i * 2 + 1 for i in range(num_rows)] # [1, 3, 5, 7, 9] for num_rows = 5
        self._k = k # k is the maximun number of matches one can take per turn

    def __bool__(self):
        return sum(self._rows) > 0 # check how many matches in total there is left

    def __str__(self):
        return "<" + " ".join(str(_) for _ in self._rows) + ">" # to string method

    @property
    def rows(self) -> tuple:
        return tuple(self._rows) # [1, 3, 5, 7, 9]
    
    @rows.setter
    def rows(self, rows: list) -> None:
        self._rows = rows

    def nimming(self, ply: Nimply) -> None:
        row, num_objects = ply # row = 3, num_objects = 3
        assert self._rows[row] >= num_objects # check if there are enough matches in the row to deduct
        assert self._k is None or num_objects <= self._k # if its a legal move, given a k
        self._rows[row] -= num_objects # [1, 3, 5, 7, 9] -> [1, 3, 5, 4, 9]

In [241]:
def nim_sum(state: Nim) -> int:
    # turning numbers into 32-bit binary tuples
    # ex [1, 3] -> [(00000000000000000000000000000001), (00000000000000000000000000000011)]
    tmp = np.array([tuple(int(x) for x in f"{c:032b}") for c in state.rows])
    # cumulatively XOR on the tuples
    xor = tmp.sum(axis=0) % 2
    return int("".join(str(_) for _ in xor), base=2)


def analize(raw: Nim) -> dict:
    # analizing the current nim state and finding the nim_sum of all the possible moves,
    # taking a deepcopy of raw to not disturb the actual game
    cooked = dict()
    cooked["possible_moves"] = dict()
    for ply in (Nimply(r, o) for r, c in enumerate(raw.rows) for o in range(1, c + 1)):
        tmp = deepcopy(raw)
        tmp.nimming(ply)
        cooked["possible_moves"][ply] = nim_sum(tmp)
    return cooked


def optimal(state: Nim) -> Nimply:
    # choosing the move that leaves the nimsum equal to zero after doing the move
    analysis = analize(state)
    logging.debug(f"analysis:\n{pformat(analysis)}")
    spicy_moves = [ply for ply, ns in analysis["possible_moves"].items() if ns == 0] # normal nim
    if not spicy_moves:
        spicy_moves = list(analysis["possible_moves"].keys())
    ply = random.choice(spicy_moves)
    return ply

In [242]:
def pure_random(state: Nim) -> Nimply:
    """A completely random move"""
    row = random.choice([r for r, c in enumerate(state.rows) if c > 0])
    num_objects = random.randint(1, state.rows[row])
    return Nimply(row, num_objects)

In [243]:
def generate_states(nim: Nim):
    # start with init_list = [0, 0, 0, 0, 0]
    init_list = [0 for i in range(len(nim.rows))]
    # and init_list_2 = [1, 0, 0, 0, 0]
    init_list_2 = deepcopy(init_list)
    init_list_2[0] = 1
    # generate all the possible states for the game
    possible_states = [init_list, init_list_2]
    for row in range(1, len(nim.rows)):
        # make all the permutations by going through each row in the nim and appending
        # new possible permutations iteratively
        matches = nim.rows[row]
        new_permutations = []
        for i in range(1, matches+1): # go through all possible number of matches in the row
            for state in possible_states: # use already created states
                # make new permutation by combining each match number to each state
                new_state = deepcopy(state)
                new_state[row] = i
                new_permutations.append(new_state)
        possible_states += new_permutations
    # make into dict for more readable, faster and organized code
    # each item is the pair (state: index_of_state)

    # remove the [0, 0, 0, 0, ...]
    possible_states.pop(0)

    # also make a dictionary for faster accessing
    states_dict = dict()
    for i, state in enumerate(possible_states):
        states_dict[tuple(state)] = i
    
    return (possible_states, states_dict)

In [244]:
Agent = namedtuple("Agent", "agent_id, params, strategy")

In [245]:
def generate_rand_params(states: dict[tuple, int], num_rows: int):
        
        # we need as many params as states in the game
        rand_params = [None for i in range(len(states))]


        for state, i in states.items():
            # random number between 0 and number of rows in game, sorting out rows with 0 matches in them
            indexOf_valid_rows = [state.index(x) for x in state if x != 0]
            if (len(indexOf_valid_rows) != 0):
                
                random_int = random.randint(0, len(indexOf_valid_rows)-1)

                # picking a random row among rows with non-zero number of matches
                indexOf_random_row = indexOf_valid_rows[random_int]
                num_matches = state[indexOf_random_row]

                # this is just a method to induce some randomness in the number of matches
                # the agent takes given how many matches there are left in the row
                matches_to_take = random.randint(1, num_matches)

                rand_params[i] = [indexOf_random_row, matches_to_take]

        # rand_params now has the format [[row, num_matches_to_take], ...], and
        # the index of each row in rand_params is the same as the index of the
        # corresponding state in possible_states

        return rand_params

In [246]:
def make_strategy(params: list):
    # rounding the real numbers to discrete values so that they fit the problem
    params = np.array(params)
    params_rounded = np.round(params)
    strategy = [Nimply(int(x[0]), int(x[1])) for x in params_rounded]
    return strategy

In [247]:
def play_game(nim: Nim, agent_1: Agent, agent_2: Agent, states: dict[tuple, int]):
    player = 0
    strategy = (agent_1.strategy, agent_2.strategy)
    game = deepcopy(nim)
    while game:
        ply = strategy[player][states[tuple(game.rows)]]
        # logging.info(f"ply: player {player} plays {ply}")
        game.nimming(ply)
        # logging.info(f"status: {game}")
        player = 1 - player
    return 1 - player # changed it to "normal nim"

In [248]:
def play_tournament(nim: Nim, states: dict[tuple, int], agents_dict: dict):
    # each and every agent will face eachother both "home and away"
    # returns number of wins for each agent

    agent_ids = list(agents_dict.keys())

    # possible games is pairs of agent IDs: [(54304, 24532) , (39523, 98415)....]
    possible_games = [(a, b) for i, a in enumerate(agent_ids) for b in agent_ids[i + 1:]]

    num_wins = dict()

    for agent in agent_ids:
        num_wins[agent] = 0

    for pair in possible_games:
        # pair is (agent_1_id, agent_2_id)

        agent_1 = agents_dict.get(pair[0])
        agent_2 = agents_dict.get(pair[1])

        # agent_1 starts:
        winner_1 = play_game(nim, agent_1, agent_2, states)
        num_wins[pair[winner_1]] = num_wins.get(pair[winner_1]) + 1

        # agent_2 starts:
        winner_2 = play_game(nim, agent_2, agent_1, states)
        num_wins[pair[1 - winner_2]] = num_wins.get(pair[1 - winner_2]) + 1

    return num_wins

In [249]:
def generate_agents(states: dict[tuple, int], num_rows: int, num_agents: int):
    agent_id_list = []
    agents = dict()

    for i in range(num_agents):

        params = generate_rand_params(states, num_rows)
        strategy = make_strategy(params)
        agent_id = random.randint(10**5, 10**6-1)

        # just check if the id has already been generated, make a new one if it has
        while agent_id in agent_id_list:
            agent_id = random.randint(10**5, 10**6-1)
        agent_id_list.append(agent_id)

        agent = Agent(agent_id, params, strategy)
        agents[agent.agent_id] = agent

    # agents has format {agent_id: Agent object, ...}
    
    return agents

In [250]:
def pick_top_agents(agents: dict[int, Agent], leaderboard: dict, num_agents: int):
    # sort the results from the tournament and pick the top performers

    sorted_leaderboard = sorted(leaderboard.items(), key=lambda x:x[1], reverse=True)

    # most_wins format: [(agent_id, num_wins), ...]
    # select the top x agents
    most_wins = sorted_leaderboard[:num_agents]

    best_agents = dict()

    for participant in most_wins:
        agent_id = participant[0]
        best_agents[agent_id] = agents.get(agent_id)

    return best_agents

In [251]:
def mutate_param(states: list, state_idx: int, row: int, sigma: float):
    # row format: [row_number, num_matches_to_take]
    state = states[state_idx] # state format ex: (1, 3, 3, 7, 9)
    indexOf_valid_rows = [state.index(x) for x in state if x != 0]

    # have a 10% chance of random row number, else mutate the row number
    if (random.randint(1,10) == 10):
        x = indexOf_valid_rows[random.randint(0, len(indexOf_valid_rows) - 1)]
    else:
        # mutate row number
        x = row[0] + np.random.normal(0, sigma)
        # make sure that the value will not go outside of bounds or to row with zero matches
        if x < -0.5:
            x = -0.5
        elif x >= (len(state) - 0.5):
            x = len(state) - 0.51
        chosen_row = round(x)
        if (chosen_row not in indexOf_valid_rows):
            x = indexOf_valid_rows[random.randint(0, len(indexOf_valid_rows) - 1)]

    # make sure that the value will be between 1 and number of matches in the row
    num_matches_in_row = state[round(x)]

    # have a 10% chance of random number of matches, else mutate the number of matches
    if (random.randint(1,10) == 10):
        y = random.randint(1, num_matches_in_row)
    else:
        # mutate number of matches to take
        y = row[1] + np.random.normal(0, sigma)

        assert num_matches_in_row > 0

        if y <= 0.5:
            y = 0.51
        elif y > num_matches_in_row + 0.5:
            y = num_matches_in_row + 0.49

    return [x, y]

In [252]:
def make_children(parents: dict[int, Agent], sigma: float, mu: int, lamb: int, states: list):
    children_per_parent = (lamb - mu) // mu

    parent_ids = parents.keys()
    parent_objects = parents.values()

    children = {}
    for parent in parent_objects:
        parent_params = parent.params

        for i in range(children_per_parent):
            
            child_id = random.randint(10**5, 10**6-1)
            while child_id in parent_ids:
                child_id = random.randint(10**5, 10**6-1)
            
            child_params = []
            for state_idx, row in enumerate(parent_params): # row format: [row_number, num_matches]
                params = mutate_param(states, state_idx, row, sigma)
                child_params.append(params)

            child_strategy = make_strategy(child_params)
            child = Agent(child_id, child_params, child_strategy)

            children[child_id] = child

    return children
                            

In [253]:
def print_top3(leaderboard: dict):
    sorted_leaderboard = sorted(leaderboard.items(), key=lambda x:x[1], reverse=True)

    # most_wins format: [(agent_id, num_wins), ...]
    # select the top x agents
    most_wins = sorted_leaderboard[:3]

    print("\n----TOP 3 AGENTS----\n")

    for agent in most_wins:
        print(f"Agent ({agent[0]}) -  {agent[1]} wins")

    print()

In [254]:
def make_optimal_strategy(num_rows: int, states_list: list):
    optimal_strategy = []
    # this is just so that it fits the play_game function I made

    for state in states_list:
        nim_game = Nim(num_rows)
        nim_game.rows = list(state)
        optimal_strategy.append(optimal(nim_game))
    
    return optimal_strategy

In [255]:
def make_random_strategy(num_rows: int, states_list: list):
    random_strategy = []
    # this is just so that it fits the play_game function I made

    for state in states_list:
        nim_game = Nim(num_rows)
        nim_game.rows = list(state)
        random_strategy.append(pure_random(nim_game))
    
    return random_strategy

In [256]:
def check_performance(nim: Nim, top_agents: dict[int, Agent], optimal_strategy: list, states_dict: dict):
    optimal_agent = Agent(666, [], optimal_strategy)

    top_agents = deepcopy(top_agents)
    top_agents[optimal_agent.agent_id] = optimal_agent

    win_stats = play_tournament(nim, states_dict, top_agents)

    print_top3(win_stats)

In [257]:
pop_size = 100
num_rows = 5
game = Nim(num_rows)
(states_list, states_dict) = generate_states(game)
initial_agents = generate_agents(states_dict, num_rows, num_agents=pop_size)

num_of_evolutions = 500
n = 1
sigma = 1
num_parents = 20

optimal_strategy = make_optimal_strategy(num_rows, states_list)
random_strategy = make_random_strategy(num_rows, states_list)

population = initial_agents

hall_of_fame = defaultdict(lambda: (None,0)) # number of times certain agents have been in top-10

for i in range(num_of_evolutions):
    # self-adaptive sigma
    lr = 1 / math.sqrt(n)
    sigma = sigma * math.exp(lr*np.random.normal(0, 1))

    # play every agent against eachother and collect number of wins
    win_stats = play_tournament(nim=game, states=states_dict, agents_dict=population)

    # pick the top x agents that should be the parents
    best = pick_top_agents(population, win_stats, num_parents)

    # a little expirement:
    # since it seemes like some agents can be unlucky and fall out of the top 10, I have 
    # made a hall of fame which consists of all the agents that have reached the top 10, and
    # also how many times they have reached top 10, but only start adding half way
    if (n > num_of_evolutions / 2):
        for top_agent in best.values():
            agent_id = top_agent.agent_id
            hall_of_fame[agent_id] = (top_agent, hall_of_fame[agent_id][1] + 1)
    
    # check how the top 10 do against eachother and I have also added the agent that adopts
    # the nim-sum optimal strategy, prints the top 3 from each iteration
    # check_performance(game, best, optimal_strategy, states_dict)

    # make children from the top 10 parents and add them to the population
    population = make_children(best, 1.0, mu=num_parents, lamb=pop_size, states=states_list)
    population.update(best)

    n += 1

In [258]:
stats = dict()

for agent_id, stats_list in hall_of_fame.items():
    stats[agent_id] = stats_list[1]

sorted_hof = sorted(stats.items(), key=lambda x:x[1], reverse=True)

top10 = sorted_hof[:10]

print(top10)

[(324725, 78), (466180, 66), (252710, 63), (538128, 58), (810320, 57), (990660, 50), (773019, 43), (757887, 38), (682669, 37), (353745, 36)]


In [259]:
optimal_agent = Agent(666, [], optimal_strategy)
random_agent = Agent(333, [], random_strategy)

best_agent_strategy = hall_of_fame[top10[0][0]][0].strategy
optimal_agent_strategy = optimal_agent.strategy

for i in range(20):
    print(states_list[i])
    print("best agent: ", best_agent_strategy[i])
    print("optimal agent: ", optimal_agent_strategy[i])
    print()


[1, 0, 0, 0, 0]
best agent:  Nimply(row=0, num_objects=1)
optimal agent:  Nimply(row=0, num_objects=1)

[0, 1, 0, 0, 0]
best agent:  Nimply(row=1, num_objects=1)
optimal agent:  Nimply(row=1, num_objects=1)

[1, 1, 0, 0, 0]
best agent:  Nimply(row=0, num_objects=1)
optimal agent:  Nimply(row=0, num_objects=1)

[0, 2, 0, 0, 0]
best agent:  Nimply(row=1, num_objects=2)
optimal agent:  Nimply(row=1, num_objects=2)

[1, 2, 0, 0, 0]
best agent:  Nimply(row=0, num_objects=1)
optimal agent:  Nimply(row=1, num_objects=1)

[0, 3, 0, 0, 0]
best agent:  Nimply(row=1, num_objects=1)
optimal agent:  Nimply(row=1, num_objects=3)

[1, 3, 0, 0, 0]
best agent:  Nimply(row=0, num_objects=1)
optimal agent:  Nimply(row=1, num_objects=2)

[0, 0, 1, 0, 0]
best agent:  Nimply(row=2, num_objects=1)
optimal agent:  Nimply(row=2, num_objects=1)

[1, 0, 1, 0, 0]
best agent:  Nimply(row=0, num_objects=1)
optimal agent:  Nimply(row=2, num_objects=1)

[0, 1, 1, 0, 0]
best agent:  Nimply(row=1, num_objects=1)
optima

In [260]:
# check against optimal
wins = 0
for (agent_id, num_wins) in top10:
    best_agent = hall_of_fame[agent_id][0]
    result_home = play_game(game,best_agent,optimal_agent,states_dict)
    if result_home == 0:
        print(f"agent ({agent_id}) vs optimal: agent")
        wins += 1
    else:
        print(f"agent ({agent_id}) vs optimal: optimal")

    result_away = play_game(game,optimal_agent,best_agent,states_dict)
    if result_away == 1:
        print(f"optimal vs agent ({agent_id}): agent")
        wins += 1
    else:
        print(f"optimal vs agent ({agent_id}): optimal")

win_rate = wins / (len(top10)*2)

print(f"Win rate: {win_rate * 100} %")


agent (324725) vs optimal: optimal
optimal vs agent (324725): optimal
agent (466180) vs optimal: optimal
optimal vs agent (466180): optimal
agent (252710) vs optimal: optimal
optimal vs agent (252710): optimal
agent (538128) vs optimal: optimal
optimal vs agent (538128): optimal
agent (810320) vs optimal: optimal
optimal vs agent (810320): optimal
agent (990660) vs optimal: optimal
optimal vs agent (990660): optimal
agent (773019) vs optimal: optimal
optimal vs agent (773019): optimal
agent (757887) vs optimal: optimal
optimal vs agent (757887): optimal
agent (682669) vs optimal: optimal
optimal vs agent (682669): optimal
agent (353745) vs optimal: optimal
optimal vs agent (353745): optimal
Win rate: 0.0 %


In [261]:
# check initial against random
print("-----Initial agents against random-----")

wins = 0
for agent_id, agent in initial_agents.items():
    
    result_home = play_game(game,agent,random_agent,states_dict)
    if result_home == 0:
        print(f"agent ({agent_id}) vs random: agent")
        wins += 1
    else:
        print(f"agent ({agent_id}) vs random: random")

    result_away = play_game(game,random_agent,agent,states_dict)
    if result_away == 1:
        print(f"random vs agent ({agent_id}): agent")
        wins += 1
    else:
        print(f"random vs agent ({agent_id}): random")

win_rate = wins / (len(initial_agents)*2)

print(f"Win rate: {win_rate * 100} %")

-----Initial agents against random-----
agent (454694) vs random: random
random vs agent (454694): random
agent (584121) vs random: agent
random vs agent (584121): agent
agent (948975) vs random: agent
random vs agent (948975): random
agent (219001) vs random: agent
random vs agent (219001): agent
agent (560010) vs random: agent
random vs agent (560010): random
agent (935589) vs random: agent
random vs agent (935589): random
agent (267057) vs random: random
random vs agent (267057): random
agent (170813) vs random: random
random vs agent (170813): random
agent (927682) vs random: random
random vs agent (927682): random
agent (944749) vs random: agent
random vs agent (944749): agent
agent (503041) vs random: random
random vs agent (503041): agent
agent (375780) vs random: agent
random vs agent (375780): random
agent (433669) vs random: random
random vs agent (433669): agent
agent (258738) vs random: random
random vs agent (258738): agent
agent (995436) vs random: agent
random vs agent (

In [262]:
# check final population against random
print("-----Final population against random-----")

wins = 0
for agent_id, agent in population.items():
    
    result_home = play_game(game,agent,random_agent,states_dict)
    if result_home == 0:
        print(f"agent ({agent_id}) vs random: agent")
        wins += 1
    else:
        print(f"agent ({agent_id}) vs random: random")

    result_away = play_game(game,random_agent,agent,states_dict)
    if result_away == 1:
        print(f"random vs agent ({agent_id}): agent")
        wins += 1
    else:
        print(f"random vs agent ({agent_id}): random")

win_rate = wins / (len(population)*2)

print(f"Win rate: {win_rate * 100} %")

-----Final population against random-----
agent (983230) vs random: random
random vs agent (983230): random
agent (224559) vs random: agent
random vs agent (224559): agent
agent (261436) vs random: random
random vs agent (261436): random
agent (341439) vs random: agent
random vs agent (341439): random
agent (396208) vs random: agent
random vs agent (396208): random
agent (156301) vs random: agent
random vs agent (156301): random
agent (180207) vs random: agent
random vs agent (180207): agent
agent (366906) vs random: agent
random vs agent (366906): random
agent (636193) vs random: random
random vs agent (636193): random
agent (506999) vs random: agent
random vs agent (506999): agent
agent (499896) vs random: agent
random vs agent (499896): agent
agent (521100) vs random: agent
random vs agent (521100): agent
agent (226432) vs random: random
random vs agent (226432): agent
agent (774294) vs random: agent
random vs agent (774294): agent
agent (126661) vs random: agent
random vs agent (12

In [263]:
# check against random
wins = 0
print("-----Hall of fame against random-----")
for (agent_id, num_wins) in top10:
    best_agent = hall_of_fame[agent_id][0]
    result_home = play_game(game,best_agent,random_agent,states_dict)
    if result_home == 0:
        print(f"agent ({agent_id}) vs random: agent")
        wins += 1
    else:
        print(f"agent ({agent_id}) vs random: random")

    result_away = play_game(game,random_agent,best_agent,states_dict)
    if result_away == 1:
        print(f"random vs agent ({agent_id}): agent")
        wins += 1
    else:
        print(f"random vs agent ({agent_id}): random")

win_rate = wins / (len(top10)*2)

print(f"Win rate: {win_rate * 100} %")


-----Hall of fame against random-----
agent (324725) vs random: agent
random vs agent (324725): agent
agent (466180) vs random: agent
random vs agent (466180): agent
agent (252710) vs random: agent
random vs agent (252710): random
agent (538128) vs random: random
random vs agent (538128): agent
agent (810320) vs random: agent
random vs agent (810320): agent
agent (990660) vs random: agent
random vs agent (990660): agent
agent (773019) vs random: agent
random vs agent (773019): agent
agent (757887) vs random: random
random vs agent (757887): agent
agent (682669) vs random: agent
random vs agent (682669): agent
agent (353745) vs random: agent
random vs agent (353745): agent
Win rate: 85.0 %
