In [1]:
import numpy as np
import random
from datetime import datetime
import time
from collections import defaultdict
from helper_func import *
import pickle
import multiprocessing
from multiprocessing import Pool



%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
class pentago:
    """
    
    """

    def __init__(self, state = None):
        """Initializes the class reservation"""
        #print('initializing')
        
        if state == None:
            self.state = state = np.zeros((6,6), dtype=np.int)
        self.history = []
        self.winner = None
        self.gameover = False
        self.player_turn = 1
    
    def current_board_state(self):
        # need to return a copy or bad stuff happens
        return copy.copy(self.state)
    
    def game_history(self, player, move, cuad, rotatation):
        self.history.append((boardstate_to_ideal_key(self.state), player, move, cuad, rotatation))
        #return self.history

    def find_winner(self, board_state):
        player1_win = False
        player_min1_win = False
        diagonal1 = board_state.diagonal()
        diagonal2 = np.fliplr(board_state).diagonal()
        winning_slices =  np.vstack([board_state[1:,:].T, board_state[:-1,:].T, # all columns
                              board_state[:,1:], board_state[:,:-1], # all rows
                              diagonal1[1:], diagonal1[:-1], # diagonal 1
                              diagonal2[1:],diagonal2[1:], # diagonal 2
                              board_state.diagonal(offset=1), board_state.diagonal(offset=-1), # diagonal offsets 
                              np.fliplr(board_state).diagonal(offset=1), np.fliplr(board_state).diagonal(offset=-1)] ) # diagonal offsets
        sums = np.dot(winning_slices, np.array([1,1,1,1,1]))
        if 5 in sums: player1_win = True
        if -5 in sums: player_min1_win = True
        if player1_win == True or player_min1_win == True:
           # print("Player 1 winner?", player1_win, "Player -1 winner?", player_min1_win)
            self.gameover = True
            if player1_win == True:
                self.winner = 1
            elif player_min1_win ==True:
                self.winner = -1
            self.history.append(self.winner)
        return "Win"

    def check_gameover(self):
        if not 0 in self.state:
              self.gameover = True
              #print("The game board is full!")
        
    def full_move(self, move, cuad, direction, player, dtype=np.int):
        if player != self.player_turn:
            print( "error, wrong player turn. No move taken.")
            return 'Error, wrong player turn.'
        self.state = fullmove(self.state,move, cuad, direction, player)


        self.game_history(move, player, cuad, direction)
        self.find_winner(self.state) #return in find_winner if a winner is found
        self.check_gameover() #return in check_gameover
        if player == 1:
            self.player_turn = -1
        else:
            self.player_turn = 1
        #print('Successful Move')
        return self.state



In [4]:
class q_table:

    def __init__(self,length=0, games_played=0):
        """Initializes the class reservation"""
        self.time = datetime.now()
        self.length = length
        self.q_dict = {}
        self.games_played = games_played

  #def time(self):
    #self.time = time

    def length(self):
        self.length += 1
    #self.length = length  
    
    def get_q_value(self, boardstate):
        return self.q_dict.get(boardstate, (0, 0))
    
    def update_q_value(self, boardstate, new_val, update_function = None):
        q_val, n = self.get_q_value(boardstate) 
        if update_function:
            #print('using custom function')
            self.q_dict[boardstate] = update_function(q_val, n, new_val)
        else:
            self.q_dict[boardstate] = [new_val, n+1]
        return self.q_dict[boardstate]
    
    def update_post_game(self, history, update_fn):
        winner = history[-1]
        
        for boardposition in history[-2::-1]:
            key = boardposition[0]
            #print(key, winner)
            self.update_q_value(key, winner, update_fn)

    def update_post_game2(self, history, update_fn, decay_reward = .9):
        winner = history[-1]
        
        for boardposition in history[-2::-1]:
            key = boardposition[0]
            #print(key, winner)
            self.update_q_value(key, winner, update_fn)
            winner *= decay_reward

    
    

In [5]:
def my_func(q, n, nn):
    #print('here',q, n, nn, 'end')
    #q, n = cv
    return (q*n+nn)/(n+1), n+1

In [6]:
def dampen_func(q, n, nn):
    #print('here',q, n, nn, 'end')
    #q, n = cv
    return (q*(n+1)+nn)/(n+2), n+1

In [7]:
%autoreload 2
class qtable_agent:
    
    def __init__(self, player = 1, epsilon = 1, epsilon_decay = .99995, epsilon_min = .5, q_table = q_table()):
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.q_table = q_table
        self.player = player
        self.epsilon_min = epsilon_min
        
            
    def get_avail_moves(self,boardstate):
        """
        This method creates a list with available spaces in the board and combination of quadrant and rotation
        The input is the board state (6x6) numpy array
        """
        x = np.where(boardstate == 0)
        #print(x)
        available_positions_for_placement = list(zip(x[0], x[1]))
        
        # all available positions (p), quadrants(q), rotations(r)
        available_moves = [(p,q,r) for p in available_positions_for_placement for q in [1,2,3,4] for r in [-1,1]]
        #print(len(available_moves))
        return available_moves
    
    def get_possible_next_boardstates(self, boardstate):
        next_possible_boardstates = defaultdict(list)
        for move in self.get_avail_moves(boardstate):
            possible_boardstate = fullmove(boardstate,*move, self.player)
            key = boardstate_to_ideal_key(possible_boardstate)
            #print(key)
            next_possible_boardstates[key].append(move)
            
        return next_possible_boardstates
    
    def make_move(self, game):
        
        # get the current boardstate from the pentago class
        boardstate = game.current_board_state()
        
        # get possible next possible boardstates
        next_possible_boardstates = self.get_possible_next_boardstates(boardstate)
        key_list = list(next_possible_boardstates.keys())
        
        # determine if to take random move
        if np.random.rand() < self.epsilon:
            random_bs = random.choice(key_list)
            random_mv = next_possible_boardstates[random_bs][0]
            
            game.full_move(*random_mv,self.player)
            
        else:
            #print("not random", self.player)
            q_values_list = [self.q_table.get_q_value(bs)[0]*self.player for bs in key_list] # *player flips the q's for -1 player to allow max calc
            #print(q_values_list)
            
            # get random index of a max value
            max_q = (max(q_values_list))
            index_of_all_max = [i for i in range(len(q_values_list)) if q_values_list[i] == max_q]
            random_max_q_index = random.choice(index_of_all_max)
            
            mv_to_take = next_possible_boardstates[key_list[random_max_q_index]][0]
            game.full_move(*mv_to_take, self.player)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay 
        else:
            self.epsilon = self.epsilon_min

In [None]:
def little_sim(agents):
    agent1, agent2 = agents
    g = pentago()
    while g.gameover == False:
        agent1.make_move(g)
        if g.gameover ==True: break
        agent2.make_move(g)
    #print('gameover.')
    return g
        
#t0 = time.time()
#if __name__ == '__main__':
#    with Pool(6) as p:
#        game_returns = p.map(little_sim, [(agent1,agent2)]*500)
#time.time() -t0

#6 on 500 is 222sec

#6 100 is 79sec
#4 100 is 67sec

In [None]:
def little_sim(agents):
    agent1, agent2 = agents
    g = pentago()
    while g.gameover == False:
        agent1.make_move(g)
        if g.gameover ==True: break
        agent2.make_move(g)
    #print('gameover.')
    return g

In [44]:
game_returns[1].winner

1

In [9]:
def big_sim_parallel(agent1, agent2, n_steps = 1, games_per_step = 500, qtables_to_update = [], parallel_threads = 6,  update_cadence = 1):
    game_times = []
    q_dict_update_times = []
    winner_list = []
    
    for n in range(n_steps):
        print('game_step', n, end = ' ')
        game_start = time.time()
        
        if __name__ == '__main__':
            with Pool(parallel_threads) as p:
                game_returns = p.map(little_sim, [(agent1,agent2)]*games_per_step)

            
        game_times.append(time.time()-game_start)
        
        player1_winner = 0
        player2_winner = 0
        # check for winner and update q_table(s)
        for game in game_returns:
            if game.winner:
                if game.winner == 1: player1_winner += 1
                else: player2_winner += 1
                
                for q_tab in qtables_to_update:
                    q_tab.update_post_game2(game.history, dampen_func)
        print("player 1 wins:", player1_winner)
        print("player 2 wins:", player2_winner)
        print("parallelized batch took", game_times[-1], "seconds.")
        
    # end of simulation runs, save q_table(s) to disk
    qt_num = 1
    time_str = str(datetime.now())[:19].replace(':','_')
    for q_tab in qtables_to_update:
        with open(f'decay_q_table{qt_num}_'+time_str+'.pickle', 'wb') as file:
            pickle.dump(q_tab, file, protocol = pickle.HIGHEST_PROTOCOL)
        qt_num += 1
    
    return game_times
            

In [10]:
# Note you will overwrite this q_table and agents if you run this cell again.    Verify you won't lose your data!
with open('decay_q_table1_2020-11-29 00_08_54.pickle', 'rb') as file:
    qtable1 =  pickle.load(file)
agent1 = qtable_agent(player = 1,  q_table=qtable1, epsilon_min = .05, epsilon = .5)
agent2 = qtable_agent(player = -1, q_table=qtable1, epsilon_min = .05, epsilon = .5)

In [None]:
##################################################
## Change number of games to simulate here
n_games = 10000
##################################################

time0 = time.time()
for x in range(20):
    game_t = big_sim_parallel(agent1, agent2, n_steps=20, qtables_to_update=[qtable1])
print(time.time()-time0, 'seconds.')

game_step 0 player 1 wins: 257
player 2 wins: 220
parallelized batch took 365.7337212562561 seconds.
game_step 1 player 1 wins: 285
player 2 wins: 192
parallelized batch took 257.35220193862915 seconds.
game_step 2 player 1 wins: 286
player 2 wins: 195
parallelized batch took 241.08361506462097 seconds.
game_step 3 player 1 wins: 272
player 2 wins: 209
parallelized batch took 247.9022159576416 seconds.
game_step 4 player 1 wins: 271
player 2 wins: 208
parallelized batch took 274.16999101638794 seconds.
game_step 5 player 1 wins: 274
player 2 wins: 200
parallelized batch took 243.872652053833 seconds.
game_step 6 player 1 wins: 276
player 2 wins: 203
parallelized batch took 252.4220209121704 seconds.
game_step 7 player 1 wins: 275
player 2 wins: 208
parallelized batch took 245.13045501708984 seconds.
game_step 8 player 1 wins: 288
player 2 wins: 186
parallelized batch took 266.3476507663727 seconds.
game_step 9 player 1 wins: 269
player 2 wins: 207
parallelized batch took 257.8820481300

In [23]:
59738/60/60


16.593888888888888

In [14]:
len(qtable1.q_dict)
#agent1.epsilon

3136962

x = [2,2,2,2,2]

with open('test.pickle', 'wb') as file:
    pickle.dump(x, file, protocol = pickle.HIGHEST_PROTOCOL)
    
with open('test.pickle', 'rb') as file:
    y = pickle.load(file)

print(y)

In [15]:
ns = []
for k,v in qtable1.q_dict.items():
    ns.append(v[1])


In [16]:
np.histogram(np.array(ns))

(array([3136955,       6,       0,       0,       0,       0,       0,
              0,       0,       1]),
 array([1.000000e+00, 1.153710e+04, 2.307320e+04, 3.460930e+04,
        4.614540e+04, 5.768150e+04, 6.921760e+04, 8.075370e+04,
        9.228980e+04, 1.038259e+05, 1.153620e+05]))

In [23]:
print(len([x for x in ns if x != 1]))
print(len([x for x in ns if x > 2]))
print(len([x for x in ns if x > 3]))
print(len([x for x in ns if x > 4]))
print(len([x for x in ns if x > 10]))

94378
39381
27224
19200
6851


In [19]:
np.histogram([x for x in ns if x != 1])

(array([94371,     6,     0,     0,     0,     0,     0,     0,     0,
            1]),
 array([2.00000e+00, 1.15380e+04, 2.30740e+04, 3.46100e+04, 4.61460e+04,
        5.76820e+04, 6.92180e+04, 8.07540e+04, 9.22900e+04, 1.03826e+05,
        1.15362e+05]))