## Some Set Up

In [None]:
from functools import reduce, partial
import operator
from math import sqrt, factorial
import numpy as np
from random import randint, random
from copy import deepcopy
import time
import asyncio

Set up some constants - this should work for all games with two players and uniform pieces (tic-tac-toe, reversi, 

In [None]:
EMPTY = 0
X = 1
O = -1

out_map = {EMPTY: '', X: 'X', O: 'O'}


In [None]:
def init_board(size):#board size 
    board = [] # arrray 
    for _ in range(size):
        board.append(EMPTY)
    return board
init_board(9)

In [None]:
def legal_moves(board): # list comprehension 
    # return tuple((i for i, v in enumerate(board) if v == EMPTY))
    return [i for i, v in enumerate(board) if v == EMPTY]
legal_moves(init_board(9))

In [None]:
def to_2d(board): # turns board into 2d instead of one dimension 
    side = int(sqrt(len(board)))
    return np.reshape(board,(side, side))
to_2d(init_board(9))

In [None]:
def has_moves(board, _=None): # if there any legal moves left on the board 
    return len(legal_moves(board)) > 0
has_moves(init_board(9))

In [None]:
def winner(board): # who won the game 
    sq = to_2d(board)
    l = len(sq)
    for i in range(len(sq)):
        s = sum(sq[i])
        if s >= l or s <= -l:
            return s // l
    sq = np.transpose(sq)
    for i in range(len(sq)):
        s = sum(sq[i])
        if s >= l or s <= -l:
            return s // l
    diag = sum(np.diag(sq))
    if diag >= l or diag <= -l:
        return diag // l
    diag = sum(np.diag(np.flipud(sq)))
    if diag >= l or diag <= -l:
        return diag // l 
    return 0

winner(init_board(9))

## Random Player and Game Play

In [None]:
def random_player(player, board, lms=None, prt=False): 
    if lms is None:
        lms = legal_moves(board)
    ri = randint(0,len(lms)-1) # pick a random number 
    pos = lms[ri] # based on random number find the position 
    board[pos] = player
    if prt:
        print(to_2d(board))
    return board
b = random_player(X, init_board(9), None, True)
b = random_player(O, b, None, True)
b = random_player(X, b, None, True)
b = random_player(O, b, None, True)
b = random_player(X, b, None, True)

In [None]:
def play(xs, os, save_game=None):# x and player and o player, gives an option to save the game and returns the winner 
    b = init_board(9)
    turn = X
    
    lms = legal_moves(b)
    while len(lms) > 0:
        if save_game is not None:
            save_game.append(b.copy())
        if turn > 0:
            b = xs(b, lms=lms)
        else:
            b = os(b, lms=lms)
        w = winner(b)
        if w:
            if save_game is not None:
                save_game.append(b.copy())
            return w
        turn *= -1
        lms = legal_moves(b)
    
    if save_game is not None:
        save_game.append(b.copy())
    return 0

saved_game=[]
xs = partial(random_player, X)
os = partial(random_player, O)
play(xs, os, saved_game)

In [None]:
def print_game(g):
    for s in g:
        print(to_2d(s))

print_game(saved_game)

In [None]:
def simulate(num, xs, os): # simulates a number of games depending on how many you want to play 
    results = { X: 0, O: 0, 0: 0,}  #map associates a key with a value, if key is known value is found faster 
    for _ in range(num):
        results[play(xs, os)] += 1
    return {k: (v/num) * 100 for k,v in results.items()}
simulate(10000, xs, os)

In [None]:
def pretty_print_board(board): # prints a board for playing interactive games  
    two_d = to_2d(board)
    cs = {1: 'X', -1: 'O', 0:'.'}
    print('  123')
    for i in range(len(two_d)):
        print(str(i+1) + ' ', end='')
        for j in range(len(two_d)):
            print(cs[two_d[i][j]], end='')
        print('')
pretty_print_board(init_board(9))

## Interactive Player

In [None]:
def interactive_player(player, board, lms=None, prt=False):
    pretty_print_board(board)
    print("Move: ", end='')
    move = input()
    pos = (int(move[0])-1) * 3 + int(move[1]) - 1
    
    if lms is None:
        lms = legal_moves(board)
    if pos not in set(lms):
        raise Exception('Illegal Move')
    board[pos] = player
    pretty_print_board(board)
    return board

In [None]:
ixs = partial(interactive_player, X)

In [None]:
play(ixs, os)

## Widget Player

In [None]:
import ipywidgets as ws
from IPython.display import display
from traitlets import Unicode


In [None]:
%gui asyncio

In [None]:
size = 3
buttons = [ws.ToggleButton(value=False, description='', disabled=True,
                     layout=ws.Layout(width='34px', height='34px'),
                     sync=True) 
           for i in range(size**2)]
hboxes = []
for i in range(size):
    hbox = []
    for j in range(size):
        hbox.append(buttons[size*i+j])
        buttons[size*i+j].i = size*i+j
        #buttons[size*i+j].observe(partial(on_button_clicked, size*i+j), names='value')
        
    hboxes.append(ws.HBox(hbox))

vbox = ws.VBox(hboxes)
clicked_value = ws.IntText(value=-1, disabled=True)
output = ws.Output()

In [None]:
def set_value_on_button_click(change):
    clicked_value.value = change['owner'].i

for b in buttons:
    b.observe(set_value_on_button_click, 'value')

In [None]:
def draw_widget_board(board, buttons):
    for i in range(len(board)):
        # buttons[i].value = False
        buttons[i].description = out_map[board[i]]
        

In [None]:
display(vbox, clicked_value, output)

In [None]:
clicked_value.value = -1
buttons[1].disabled = False

In [None]:
draw_widget_board(init_board(9), buttons)
draw_widget_board([0,1,1,-1,0,0,0,1,0], buttons)

In [None]:
def wait_for_change(widget, value):
    future = asyncio.Future()
    def getvalue(change):
        # make the new value available
        future.set_result(change.new)
        widget.unobserve(getvalue, value)
    widget.observe(getvalue, value)
    return future


In [None]:
async def widget_player(player, buttons,  board, lms=None, prt=False):
    draw_widget_board(board, buttons)
    
    # disable all for first time through
    # for i in range(len(buttons)):
        # buttons[i].disabled = True
    
    if lms is None:
        lms = legal_moves(board)
    
    # enable the legal moves
    for i in lms:
        buttons[i].disabled = False
    
    out = []
    def fun(widget, out):
        out.append(await wait_for_change(widget, 'value'))
        with output:
            print('here', widget.value)
        return out
    
    clicked = await fun(clicked_value, out)
    
    #pos = change
    board[out[0]] = player
    for i in range(len(buttons)):
        buttons[i].disabled = True
        buttons[i].value = False
    draw_widget_board(board, buttons)
    return board

In [None]:
def async_play(xs, os, save_game=None):# x and player and o player, gives an option to save the game and returns the winner 
    b = init_board(9)
    turn = X
    
    
    lms = legal_moves(b)
    with output:
        print('outside loop', lms)
    while len(lms) > 0:
        with output:
            print('turn', turn)
        if save_game is not None:
            save_game.append(b.copy())
        if turn > 0:
            ret = await xs(b, lms=lms)
            with output:
                print('return', ret)
            b = ret
            
        else:
            b = os(b, lms=lms)
        w = winner(b)
        if w:
            if save_game is not None:
                save_game.append(b.copy())
            return w
        turn *= -1
        lms = legal_moves(b)
    
    if save_game is not None:
        save_game.append(b.copy())
    return 0

In [None]:
output = ws.Output()
wpx = partial(widget_player, X, buttons)
x = asyncio.ensure_future(async_play(wpx, mm_os))
display(vbox, clicked_value, output)

In [None]:
x

## MinMax

In [None]:
def build_tree(board, caches, factor=1, level=1):
    w = winner(board)
    if w:
        return [[factor, level*-1, None, w, w]]
    
    lms = legal_moves(board)
    if len(lms) == 0:
        return [[factor, level*-1, None, 0, 0]]
    
    trees = []
    for m in lms:
        bc = board.copy()
        bc[m] = factor
        trees.append([factor, level*-1, m, None, build_tree(bc, caches, factor * -1, level*-1)])
        if level == -1:
            trees[-1][3] = max([sub_tree[3] for sub_tree in trees[-1][4]])
        else:
            trees[-1][3] = min([sub_tree[3] for sub_tree in trees[-1][4]])
    
    if level == 1:
        mx = max([sub_tree[3] for sub_tree in trees])
        caches[1][tuple(board)] = [sub_tree[2] for sub_tree in trees if sub_tree[3] == mx]
    else:
        mx = min([sub_tree[3] for sub_tree in trees])
        caches[-1][tuple(board)] = [sub_tree[2] for sub_tree in trees if sub_tree[3] == mx]
    return trees

In [None]:
min_max_caches = {1:{}, -1:{}}
build_tree(init_board(9), min_max_caches)

In [None]:
def min_max_player(player, caches, board, lms=None, prt=False):
    best_moves = caches[player][tuple(board)]
    ri = randint(0,len(best_moves)-1)
    board[best_moves[ri]] = player
    if prt:
        print(to_2d(board))
    return board
mm_xs = partial(min_max_player, X, min_max_caches)
mm_os = partial(min_max_player, O, min_max_caches)

In [None]:
simulate(10000, mm_xs, os), simulate(10000, xs, mm_os), simulate(10000, mm_xs, mm_os)

In [None]:
def q_table_player(player, q_table, episilon, board, lms=None, prt=False):
    actions = q_table.setdefault(tuple(board), {lm:0.0 for lm in legal_moves(board)})
    if random() > (1-episilon):  # set epsilon to 0.0 for no random moves
        return random_player(player, board, lms, prt)
    else:
        max_action = max(actions.values())
        best_actions = [m for m, v in actions.items() if v == max_action]
        ri = randint(0,len(best_actions)-1)
        board[best_actions[ri]] = player 
        return board
    


In [None]:
q_table = {}

In [None]:


def train_q_table(n=1, gamma=0.95, alpha=0.2, training_partner=None):
    xs = partial(q_table_player, 1, q_table, 0.3)
    if training_partner is None:
        training_partner = partial(random_player, -1)
    
    for _ in range(n):
        save = []
        r = play(xs, os, save)
        # print(print_game(save))
        if r:
            if r == -1:
                start = -2
            else:
                start = -1
            for i in range(start, -1 * len(save), -2):
                
                move = [x - y for x, y in zip(save[i], save[i-1])].index(1)
                current = q_table[tuple(save[i-1])][move]
                # print(i, move, current)
                q_table[tuple(save[i-1])][move] = alpha * (current + r) * (1-alpha)
                r *= gamma
        if _ % 10000 == 0:
            print('.', end='')
        if _ % 100000 == 0:
            print()
            
train_q_table(500000)

In [None]:
qt_xs = partial(q_table_player, 1, q_table, 0.0)
play(qt_xs, os)

In [None]:
simulate(10000, qt_xs, mm_os)

## Play Async

In [None]:
import ipywidgets as ws
from IPython.display import display
from traitlets import Unicode
import asyncio

In [None]:
%gui asyncio

In [None]:
button1 = ws.ToggleButton(description='click')
button1.i = 1
button2 = ws.ToggleButton(description='click')
button2.i = 2
output = ws.Output()
click_value = ws.IntText(value=-1, disabled=True)

def button_change(change):
    click_value.value = change['owner'].i

button1.observe(button_change, 'value')
button2.observe(button_change, 'value')


In [None]:
display(button1, button2, click_value, output)

In [None]:
def wait_for_change(widget, value):
    future = asyncio.Future()
    def getvalue(change):
        # make the new value available
        future.set_result(change.new)
        widget.unobserve(getvalue, value)
    widget.observe(getvalue, value)
    return future

In [None]:
def fun(widget, out):
    for i in range(5):
        out.append(await wait_for_change(widget, 'value')) 
        with output:
            print('here', widget.value)
        click_value.value = -1
    return out

In [None]:
def outside():
    out = []
    x = fun(click_value, out)
    await x
    return out

In [None]:
x = asyncio.ensure_future(outside())

In [None]:
x

In [None]:
x = asyncio.ensure_future(fun(click_value, out))
x

In [None]:
x

In [None]:
slider = ws.IntSlider()
out = ws.Output()

async def f():
    for i in range(10):
        out.append_stdout('did work ' + str(i) + '\n')
        x = await wait_for_change(slider, 'value')
        out.append_stdout('async function continued with value ' + str(x) + '\n')
asyncio.ensure_future(f())

slider

In [None]:
out

In [None]:
out = []

In [None]:
x = asyncio.ensure_future(fun(click_value, out))

In [None]:
x, out

In [None]:
click_value.value = -1
loop = asyncio.get_event_loop()
x = loop.create_task(fun(loop, click_value, out))
x

In [None]:
async def wrapping_fun():
    click_value.value = -1
    loop = asyncio.get_event_loop()
    x = loop.create_task(fun(loop, click_value, out))
    while not x.done():
        print('here')
        await asyncio.sleep(10)
# y = asyncio.ensure_future(fun(button2, out))

In [None]:
await wrapping_fun()

In [None]:
x, out

In [None]:
x.result()

In [None]:
# await asyncio.wait({fun(button1, out),fun(button2, out)}, return_when=asyncio.FIRST_COMPLETED)
await asyncio.wait({x, y}, return_when=asyncio.FIRST_COMPLETED)

In [None]:
x

In [None]:
x = asyncio.create_task(asyncio.ensure_future(fun(button1, out)))
y = asyncio.create_task(asyncio.ensure_future(fun(button2, out)))

In [None]:
out[0].owner.i

In [None]:
out, button1.value, button2.value

In [None]:
button = ws.ToggleButton(description='click')
button

In [None]:
def clicked(change):
    return(change)
y = button.observe(clicked, 'value')

In [None]:
y

In [None]:
size = 3
buttons = [ws.ToggleButton(value=False, description='', 
                     layout=ws.Layout(width='34px', height='34px'),
                     sync=True) 
           for i in range(size**2)]
hboxes = []
for i in range(size):
    hbox = []
    for j in range(size):
        hbox.append(buttons[size*i+j])
        buttons[size*i+j].i = size*i+j
        # buttons[size*i+j].observe(partial(on_button_clicked, size*i+j), names='value')
        
    hboxes.append(ws.HBox(hbox))

vbox = ws.VBox(hboxes)
output = ws.Output()

In [None]:
vbox

In [None]:
%autoawait

In [None]:
from random import randint
def foo(n,s):
    future = asyncio.Future()
    await asyncio.sleep(s)
    future.set_result(n)
    return future

task = asyncio.ensure_future(foo(24, 5))
task1 = asyncio.ensure_future(foo(42, 1))
task, task1

In [None]:
await asyncio.wait({task, task1}, return_when=asyncio.FIRST_COMPLETED)

In [None]:
display(vbox, clicked_value)

In [None]:
vbox.keys

In [None]:
async def f():
    return 42

In [None]:
x = asyncio.ensure_future(f())

In [None]:
x

In [None]:
loop = asyncio.get_event_loop()

## Play widget yield

In [None]:
import time
import ipywidgets as ws
from IPython.display import display
from traitlets import Unicode
import asyncio

In [None]:
button1 = ws.ToggleButton(description='click')
button1.i = 1
button2 = ws.ToggleButton(description='click')
button2.i = 2
output = ws.Output()
click_value = ws.IntText(value=-1, disabled=True)

def button_change(change):
    click_value.value = change['owner'].i

button1.observe(button_change, 'value')
button2.observe(button_change, 'value')



In [None]:
display(button1, button2, click_value, output)

In [None]:
from functools import wraps
def yield_for_change(widget, attribute):
    """Pause a generator to wait for a widget change event.

    This is a decorator for a generator function which pauses the generator on yield
    until the given widget attribute changes. The new value of the attribute is
    sent to the generator and is the value of the yield.
    """
    def f(iterator):
        @wraps(iterator)
        def inner(*args):
            i = iterator(*args)
            def next_i(change):
                try:
                    i.send(change.new)
                except StopIteration as e:
                    with output:
                        print('stop iteration')
                    widget.unobserve(next_i, attribute)
            widget.observe(next_i, attribute)
            # start the generator
            next(i)
            return i
        return inner
    return f

In [None]:
@yield_for_change(button1, 'value')
def f(out): 
    x = yield
    if x >= 0:
        out.append(x)
    with output:
        print(x)
    

In [None]:
out = []



In [None]:
click_value.value = -1
g = f(out)

In [None]:
async def wait_for_out():
    while not out:
        await asyncio.sleep(.5)

In [None]:
await wait_for_out()

In [None]:
out

In [None]:
button1.unobserve(all)

In [None]:
g.send(-1)

In [None]:
g.send(-1)