In [1]:
#load packages
import numpy as np
import random
import pickle
import time

In [25]:
#specify hyper parameters, complexity and starting player
learning_rate=0.05 #alpha
decay_gamma=0.7 #gamma
exp_rate=0.3 #epsilon
starting_player='dummy'
list_of_winners=[]
policy={}
number_of_pits=4
number_of_stones=4

In [45]:
#create the game class
class Imkalah:
    def __init__(self, pits, pebbles):
        self.starting_player=starting_player
        self.player=starting_player
        self.pits=pits
        self.pebbles=pebbles
        self.board=[pebbles]*pits+[0]
        self.number_of_pits=len(self.board)
        self.lr=learning_rate
        self.gamma=decay_gamma
        self.exp_rate=exp_rate
        self.Q_table={}
        self.game_over=False
        self.states=[]
    def choose_random_pit(self):
        """This function chooses a random pit.
        Returns an index for a non-empty game pit"""
        return random.choice(list(np.nonzero(self.board[:-1])[0]))
    def choose_smart_pit(self):
        """This function chooses the most optimal non-empty game pit according to the Q_table.
        Returns the index for the best choice of non-empty game pit"""
        current_board=self.board.copy()
        boards_to_evaluate_in_Q_table=[]
        possible_moves_1=list(np.nonzero(current_board[:-1])[0])
        boards_to_check=[]
        for pit_to_choose in possible_moves_1:
            turn, _, test_board=self.test_move(pit_to_choose, current_board.copy())
            if turn=='over':
                boards_to_evaluate_in_Q_table.append((pit_to_choose,test_board))
            else:
                boards_to_check.append(test_board.copy())
                while len(boards_to_check)>0:
                    current_board_1=boards_to_check.pop(0)
                    done=False
                    possible_moves=list(np.nonzero(current_board_1[:-1])[0])
                    for move in possible_moves:
                        turn, _, test_board=self.test_move(move, current_board_1.copy())
                        if turn=='over':
                            boards_to_evaluate_in_Q_table.append((pit_to_choose,test_board))
                        else:
                            boards_to_check.append(test_board.copy())
        value_max=-999
        best_index=None
        for next_board in boards_to_evaluate_in_Q_table:
            value = 0 if self.Q_table.get(str(next_board[1])) is None else self.Q_table.get(str(next_board[1]))
            if value >= value_max:
                value_max = value
                best_index=next_board[0]
        return best_index
    def count_moves_to_target_pit(self, board):
        """
        this function counts the number of moves which lands directly in the target pit
        returns an integer describing the number of possible moves which lands in the target pit
        """
        counter=0
        possible_moves=list(np.nonzero(board[:-1])[0])
        for i in possible_moves:
            if (board[i]%self.number_of_pits)==(self.number_of_pits-1-i):
                counter+=1
        return counter
    def choose_pit_strategy(self):
        """
        this function represents the target pit strategy
        returns the correct index according to the target pit strategy
        """
        current_board=self.board.copy()
        if (current_board[-1]==((self.pits*self.pebbles)-3)) and (current_board[-2]==1) and (current_board[-3]==2):
            return self.pits-1
        moves_directly_to_target=[]
        moves_indirectly_to_target=[]
        turn_over_moves=[]
        possible_moves=list(np.nonzero(current_board[:-1])[0])
        for i in possible_moves:
            if (current_board[i]%self.number_of_pits)==(self.number_of_pits-1-i):
                moves_directly_to_target.append(i)
            else:
                turn,_,_=self.test_move(i, current_board.copy())
                if turn=='ongoing':
                    moves_indirectly_to_target.append(i)
                else:
                    turn_over_moves.append(i)
        index_to_choose=None
        max_val=-1
        if len(moves_directly_to_target)>0:
            for i in moves_directly_to_target:
                if current_board[i]>max_val:
                    max_val=current_board[i]
                    index_to_choose=i
            return index_to_choose
        elif len(moves_indirectly_to_target)>0:
            for i in moves_indirectly_to_target:
                if current_board[i]>max_val:
                    max_val=current_board[i]
                    index_to_choose=i
            return index_to_choose
        else:
            for i in turn_over_moves:
                if current_board[i]>max_val:
                    max_val=current_board[i]
                    index_to_choose=i
            return index_to_choose
    def choose_pit_strategy2(self):
        """This functions does not care for direct vs indirect move to target pit
        this is an alteration of the target pit strategy
        returns the correct index according to the strategy
        """
        current_board=self.board.copy()
        if (current_board[-1]==((self.pits*self.pebbles)-3)) and (current_board[-2]==1) and (current_board[-3]==2):
            return self.pits-1
        moves_directly_to_target=[]
        moves_indirectly_to_target=[]
        turn_over_moves=[]
        possible_moves=list(np.nonzero(current_board[:-1])[0])
        for i in possible_moves:
            if (current_board[i]%self.number_of_pits)==(self.number_of_pits-1-i):
                moves_directly_to_target.append(i)
            else:
                turn,_,_=self.test_move(i, current_board.copy())
                if turn=='ongoing':
                    moves_indirectly_to_target.append(i)
                else:
                    turn_over_moves.append(i)
        index_to_choose=None
        max_val=-1
        moves_to_target=moves_directly_to_target+moves_indirectly_to_target
        if len(moves_to_target)>0:
            for i in moves_directly_to_target:
                if current_board[i]>max_val:
                    max_val=current_board[i]
                    index_to_choose=i
            return index_to_choose
        else:
            for i in turn_over_moves:
                if current_board[i]>max_val:
                    max_val=current_board[i]
                    index_to_choose=i
            return index_to_choose
    def possible_next_states(self, init_board):
        """This function finds all possible board states achieveable from a given state.
        Input is a board for which to find achieveable board states from
        Returns a list of board states achieveable from the input board."""
        if init_board[-1]==self.pebbles*self.pits:
            return None
        list_of_board_states=[]
        boards_to_check=[init_board.copy()]
        while len(boards_to_check)>0:
            current_board=boards_to_check.pop(0)
            possible_moves=list(np.nonzero(current_board[:-1])[0])
            for pit_to_choose in possible_moves:
                test_board=current_board.copy()
                turn, game_over, test_board = self.test_move(pit_to_choose, test_board)
                if turn=='over' or game_over==True:
                    list_of_board_states.append(test_board)
                else:
                    boards_to_check.append(test_board)
        return [list(x) for x in set(tuple(subliste) for subliste in list_of_board_states)]     
    def test_move(self, pit_index, test_board):
        """This function is for testing the outcome of choosing a certain non-empty game pit from a certain game state
        Input is the index of the non-empty game pit and the board state to test from.
        Returns:
            turn: is either ongoing or over, depending on whether the move ends the turn or requires another move
            game_over: is true if the move results in all pebbles being in the target pit.
            test_board: the board state after the move is done"""
        turn='ongoing'
        #print('TESTBOARD', test_board, 'INDEX', pit_index)
        pit_to_choose=pit_index
        game_over=False
        while turn=='ongoing' and game_over==False:
            pebbles_on_hand=test_board[pit_to_choose]
            #print('PEBBLES ON HAND', pebbles_on_hand)
            test_board[pit_to_choose]=0
            for i in range(pebbles_on_hand):
                pit_to_add_to=(pit_to_choose+1+i)%(len(test_board))
                test_board[pit_to_add_to]+=1
                #print('board while performing move', test_board)
            if pit_to_add_to==len(test_board)-1: #if last stone is placed in j0
                if test_board[-1]==sum(test_board): #if it is the last stone in the game
                    game_over=True
                    turn='over'
                else:
                    return turn, game_over, test_board
            elif test_board[pit_to_add_to]==1:#if last stone is placed in empty pit
                turn='over'
            else:
                pit_to_choose=pit_to_add_to #last stone is not placed in empty pit, so continue
        return turn, game_over, test_board
    def perform_move(self, pit_index):
        """Function to perform a move to the actual game board.
        input: pit_index is the index(0-indexed) of the pit in the board which the player
                should take the pebbles from
        Returns whether or not the turn is over by performing the move."""
        turn='ongoing'
        pit_to_choose=pit_index
        while turn=='ongoing' and self.game_over==False:
            pebbles_on_hand=self.board[pit_to_choose]
            self.board[pit_to_choose]=0
            for i in range(pebbles_on_hand):
                pit_to_add_to=(pit_to_choose+1+i)%self.number_of_pits
                self.board[pit_to_add_to]+=1
            if pit_to_add_to==len(self.board)-1: #if last stone is placed in j0
                if self.board[-1]==self.pebbles*self.pits: #if it is the last stone in the game
                    self.game_over=True
                    turn='over'
                else:
                    return turn
            elif self.board[pit_to_add_to]==1:#if last stone is placed in empty pit
                turn='over'
            else:
                pit_to_choose=pit_to_add_to #last stone is not placed in empty pit, so continue
        return turn
    def change_player(self):
        """Function for changing current player.
        Returns None"""
        if self.player=='agent':
            self.player='dummy'
        else:
            self.player='agent'
    def reset(self):
        """Function for resetting the game.
        Returns None"""
        self.game_over=False
        winner=None
        self.player=starting_player
        self.j0=0
        self.board=[self.pebbles]*self.pits+[0]
        self.states=[]
    def feedReward(self, reward):
        """Function for updating the Q_table.
        Input is the reward/penalty to provide board states from a game.
        Returns None"""
        for st in reversed(self.states):  # goes through all saved board states of this game
            temp=-1000
            possible_next_st=self.possible_next_states(st)
            if possible_next_st is not None:
                for i in possible_next_st:
                    #if self.Q_table.get(str(i)) is not None:
                    if self.Q_table.get(str(i)) is not None and self.Q_table.get(str(i))>temp:
                        temp=self.Q_table.get(str(i))
                if self.Q_table.get(str(st)) is None:
                    self.Q_table[str(st)] = 0  # initialise a value for the state
                self.Q_table[str(st)] += self.lr * (reward -self.gamma*temp- self.Q_table[str(st)])
            else:
                if self.Q_table.get(str(st)) is None:  # if it's not already in the dictionary (of board states of ALL games)
                    self.Q_table[str(st)] = 0  # initialise a value for the state
                self.Q_table[str(st)] += self.lr * (
                        self.gamma * reward - self.Q_table[str(st)])  # update weight for each board state
    def training_game(self, rounds=1000):
        """Function for training the agent.
        Returns None"""
        for i in range(rounds):
            while not self.game_over:
                if self.player=='dummy':
                    turn='ongoing'
                    while turn=='ongoing':
                        move= self.choose_random_pit()
                        turn=self.perform_move(move)
                else:
                    if np.random.uniform(0,1)<= self.exp_rate:
                        turn='ongoing'
                        while turn=='ongoing':
                            move= self.choose_random_pit()
                            turn=self.perform_move(move)
                            self.states.append(self.board.copy())
                    else:
                        turn='ongoing'
                        while turn=='ongoing':
                            move= self.choose_smart_pit()
                            turn=self.perform_move(move)
                            self.states.append(self.board.copy())
                if self.game_over:
                    winner=self.player
                    list_of_winners.append(winner)
                    if winner=='agent':
                        self.feedReward(10)
                    else:
                        self.feedReward(-1)
                    self.reset()
                    break
                else:
                    self.change_player()
    def testing_game(self, rounds=1000):
        """Function for testing the agent using the Q_table obtained from training.
        Returns None"""
        for i in range(rounds):
            while not self.game_over:
                if self.player=='dummy':
                    turn='ongoing'
                    while turn=='ongoing':
                        move= self.choose_random_pit()
                        turn=self.perform_move(move)
                else:
                        turn='ongoing'
                        while turn=='ongoing':
                            move= self.choose_smart_pit()
                            turn=self.perform_move(move)
                            self.states.append(self.board)
                if self.game_over:
                    winner=self.player
                    list_of_winners.append(winner)
                    self.reset()
                    break
                else:
                    self.change_player()
    def target_pit_strategy(self, rounds=1000):
        """Actual game using the target pit strategy"""
        for i in range(rounds):
            while not self.game_over:
                if self.player=='dummy':
                    turn='ongoing'
                    while turn=='ongoing':
                        move= self.choose_random_pit()
                        turn=self.perform_move(move)
                        self.states.append((move, turn, self.board.copy(), self.player))
                else:
                        turn='ongoing'
                        while turn=='ongoing':
                            move=self.choose_pit_strategy()
                            turn=self.perform_move(move)
                            self.states.append((move, turn, self.board.copy(), self.player))
                if self.game_over:
                    winner=self.player
                    list_of_winners.append(winner)
                    self.reset()
                    break
                else:
                    self.change_player()
    def target_pit_strategy2(self, rounds=1000):
        """does not seperate between direct vs indirect move to target"""
        for i in range(rounds):
            while not self.game_over:
                if self.player=='dummy':
                    turn='ongoing'
                    while turn=='ongoing':
                        move= self.choose_random_pit()
                        turn=self.perform_move(move)
                        self.states.append((move, turn, self.board.copy(), self.player))
                else:
                        turn='ongoing'
                        while turn=='ongoing':
                            move=self.choose_pit_strategy2()
                            turn=self.perform_move(move)
                            self.states.append((move, turn, self.board.copy(), self.player))
                if self.game_over:
                    winner=self.player
                    list_of_winners.append(winner)
                    self.reset()
                    break
                else:
                    self.change_player()

In [46]:
play=Imkalah(number_of_pits,number_of_stones)

In [47]:
total_rounds=0
rounds=1000
list_of_winners=[]
start_time=time.time()
while list_of_winners.count('agent')*100/(rounds*2) <100:
    list_of_winners = []
    play.training_game(rounds)
    list_of_winners=[]
    total_rounds+=rounds
    play.testing_game(rounds*2)
    print('agent wins', list_of_winners.count('agent')*100/(rounds*2), 'percent of the test games, and the Q-table has', len(play.Q_table),'entries after', total_rounds, 'training games.')
end_time=time.time()
print('it took', round(end_time-start_time,3), 'seconds to finish')

agent wins 99.75 percent of the test games, and the Q-table has 292 entries after 1000 training games.
agent wins 97.8 percent of the test games, and the Q-table has 329 entries after 2000 training games.
agent wins 99.8 percent of the test games, and the Q-table has 353 entries after 3000 training games.
agent wins 99.8 percent of the test games, and the Q-table has 366 entries after 4000 training games.
agent wins 99.3 percent of the test games, and the Q-table has 376 entries after 5000 training games.
agent wins 99.45 percent of the test games, and the Q-table has 381 entries after 6000 training games.
agent wins 100.0 percent of the test games, and the Q-table has 387 entries after 7000 training games.
it took 36.802 seconds to finish


In [28]:
list_of_winners=[]
play.testing_game(5000)
print('agent wins', list_of_winners.count('agent')*100/5000, 'percent of the test games', len(play.Q_table))

agent wins 100.0 percent of the test games 362


In [37]:
list_of_winners=[]
rounds=5000
play.target_pit_strategy(rounds)
print('agent wins', list_of_winners.count('agent')*100/rounds, 'percent of the test games')

agent wins 74.66 percent of the test games


In [48]:

import ast

my_dict=play.Q_table
sorted_dict = sorted(my_dict.items(), key=lambda x: x[1], reverse=True)
top_keys = [x[0] for x in sorted_dict]

best_boards=top_keys
for i in best_boards:
    print( i,round(my_dict[str(i)],3))

[5, 0, 1, 0, 10] 13.589
[0, 0, 1, 0, 15] 11.83
[3, 0, 5, 1, 7] 8.977
[1, 0, 0, 0, 15] 8.788
[1, 1, 4, 3, 7] 7.858
[0, 5, 0, 1, 10] 7.696
[1, 1, 0, 1, 13] 7.559
[0, 5, 0, 4, 7] 7.497
[0, 0, 0, 0, 16] 7.0
[1, 5, 1, 0, 9] 6.876
[1, 0, 0, 1, 14] 6.783
[1, 0, 2, 1, 12] 6.457
[0, 2, 0, 1, 13] 6.132
[5, 1, 1, 0, 9] 6.118
[2, 1, 0, 2, 11] 6.092
[0, 0, 6, 1, 9] 5.939
[2, 1, 0, 0, 13] 5.753
[0, 1, 1, 9, 5] 5.747
[1, 1, 0, 0, 14] 5.475
[3, 0, 0, 1, 12] 5.415
[2, 0, 0, 1, 13] 5.206
[4, 0, 1, 3, 8] 5.008
[0, 9, 1, 0, 6] 4.968
[0, 4, 1, 0, 11] 4.965
[1, 1, 1, 4, 9] 4.889
[1, 1, 0, 2, 12] 4.647
[2, 1, 1, 0, 12] 4.639
[0, 4, 1, 1, 10] 4.574
[2, 1, 1, 1, 11] 4.558
[0, 1, 0, 3, 12] 4.535
[7, 2, 2, 0, 5] 4.519
[5, 0, 0, 0, 11] 4.509
[2, 0, 1, 2, 11] 4.464
[0, 6, 1, 1, 8] 4.45
[1, 1, 4, 1, 9] 4.431
[4, 1, 3, 2, 6] 4.43
[0, 2, 0, 2, 12] 4.418
[0, 0, 6, 0, 10] 4.337
[3, 3, 0, 2, 8] 4.276
[1, 0, 3, 0, 12] 4.265
[6, 0, 0, 7, 3] 4.264
[1, 0, 6, 0, 9] 4.252
[2, 1, 0, 1, 12] 4.235
[1, 1, 2, 1, 11] 4.233
[1, 6, 6