In [11]:
import numpy as np
from mygrad import sliding_window_view
from collections import Counter
from time import time

In [12]:
RED = 1
YELLOW = 10

In [3]:

plays = Counter()
wins = Counter()

In [13]:
def possible_fours(board):
    h = sliding_window_view(board,(1,4),1).reshape(-1,4)
    v = sliding_window_view(board.T,(1,4),1).reshape(-1,4)
    d1 = sliding_window_view(board,(4,4),1).reshape(-1,4,4).transpose(1,2,0).diagonal()
    d2 = sliding_window_view(board[:,::-1],(4,4),1).reshape(-1,4,4).transpose(1,2,0).diagonal()
    return h,v,d1,d2

def four_in_row(board):
    for possible_four in possible_fours(board):
        total = np.sum(possible_four,axis=1)
        if total[total == RED * 4].size > 0:

            return RED
        if total[total == YELLOW * 4].size > 0:
            return YELLOW
    if 0 not in board:
        return -1
    return 0
def valid_move(move,column_heights):
    moves = possible_moves(column_heights)
    return moves[moves == move].size
def possible_moves(column_heights):
    return np.array(np.where(column_heights < 6)).reshape(-1)
def apply_move(move,color,board,column_heights):
    assert  column_heights[move] < 6
    
    board[5-column_heights[move],move] = color
    column_heights[move] += 1
    return board

def next_boards(color,board,column_heights):
    next = []
    for move in possible_moves(column_heights):
        next.append(str(apply_move(move,color,board.copy(),column_heights.copy())))
    return next
def change_color(color):
    return RED if color == YELLOW else YELLOW

In [14]:
def get_MCTS_move(board,columm_heights,color,calc_time=3,depth=10,plays=None,wins=None,stats=True):
    
    moves = possible_moves(columm_heights)
    if moves.size == 0:
        return
    if moves.size == 1:
        return moves[0]
    if plays is None:
        plays = Counter()
    if wins is None:
        wins = Counter()
    
    games = 0
    begin = time()
    while time() - begin < calc_time:
        b = board.copy()
        c = columm_heights.copy()
        run_MCTS_sim(b,c,color,plays,wins)
        games += 1

    winrate, move = max(
            (wins.get((color, S), 0) /
             plays.get((color, S), 1),
             p)
            for p, S in zip(moves,next_boards(color,board,columm_heights)))
    if stats:
        print("simulations:{} move:{} winrate:{}".format(games,move,winrate))
    return move

In [15]:
def run_MCTS_sim(board,column_heights,color,plays,wins):
    visited_boards = set()
    turn = np.sum(column_heights)
    win = 0
    expand = 2
    for t in np.arange(42 - turn):
        legal_moves = possible_moves(column_heights)
        move = np.random.choice(legal_moves)
        apply_move(move,color,board,column_heights)
        if expand and (color, str(board)) not in plays:
            expand -= 1
            plays[(color,str(board))] = 0
            wins[(color,str(board))] = 0
        visited_boards.add((color,str(board)))
        
        color = change_color(color)
        
        win = four_in_row(board)
        if win:
            break
    for c, b in visited_boards:
        if (c,b) not in plays:
            continue
        plays[(c,b)] += 1
        if c == win:
            wins[(c,b)] += 1
    return plays, wins
            
        

In [21]:
def training(plays=None,wins=None):
    board = np.zeros((6,7),dtype=np.int)
    column_heights = np.zeros(7,dtype=np.int)
    color = RED
    while not four_in_row(board):
        move = get_MCTS_move(board,column_heights,color,plays=plays,wins=wins,stats=False)
        apply_move(move,color,board,column_heights)
        color = change_color(color)
    print("winner: ", four_in_row(board))

In [22]:
def computer_vs_computer(plays=None,wins=None):
    board = np.zeros((6,7),dtype=np.int)
    column_heights = np.zeros(7,dtype=np.int)
    color = RED
    print(board)
    while not four_in_row(board):
        move = get_MCTS_move(board,column_heights,color,plays=plays,wins=wins)
        apply_move(move,color,board,column_heights)
        color = change_color(color)
        print(board)
    print("winner: ", four_in_row(board))

In [23]:
def human_vs_computer(plays=None,wins=None):
    board = np.zeros((6,7),dtype=np.int)
    column_heights = np.zeros(7,dtype=np.int)
    color = RED
    print(board)
    while not four_in_row(board):
        if color == RED:
            move = int(input("Enter column:")) -1
        else:
            move = get_MCTS_move(board,column_heights,color,plays=plays,wins=wins)
        apply_move(move,color,board,column_heights)
        color = change_color(color)
        print(board)
    print("winner: ", four_in_row(board))

In [24]:
import pickle
import pickle
with open('plays.pkl',mode='rb') as f:
    plays = pickle.load(f)
with open('wins.pkl',mode='rb') as f:
    wins = pickle.load(f)
    
for i in range(10):
    training(plays,wins)
with open('plays.pkl',mode='wb') as f:
    pickle.dump(plays,f)
with open('wins.pkl',mode='wb') as f:
    pickle.dump(wins,f)  
    

winner:  1
winner:  10
winner:  10
winner:  10
winner:  1
winner:  10
winner:  10
winner:  10
winner:  10
winner:  10


In [None]:
import pickle
with open('plays.pkl',mode='wb') as f:
    pickle.dump(plays,f)
with open('wins.pkl',mode='wb') as f:
    pickle.dump(wins,f)