# Allgemein

In [53]:
import random
random.seed(1)

import IPython.display 
import ipycanvas as cnv

In [54]:
def play_game(canvas):
    State = gStart
    while True: 
        val, State = best_move(State);
        draw(State, canvas, f'For me, the game has the value {val}.')
        if finished(State):
            final_msg(State)
            return
        IPython.display.clear_output(wait=True)
        State = get_move(State)
        draw(State, canvas, '')
        if finished(State):
            IPython.display.clear_output(wait=True)
            final_msg(State)
            return

In [55]:
def to_list(State): 
    return [list(row) for row in State]

def to_tuple(State): 
    return tuple(tuple(row) for row in State)

# Memoisierung

In [56]:
gCache = {}

def memoize(f):
    global gCache
    
    def f_memoized(*args):
        if (f, args) in gCache:
            return gCache[(f, args)]
        result = f(*args)
        gCache[(f, args)] = result
        return result
    
    return f_memoized

# Mini-Max-Algorithmus

In [57]:
def best_move(State):
    NS        = next_states(State, gPlayers[0])
    bestVal   = maxValue(State)
    BestMoves = [s for s in NS if minValue(s) == bestVal]
    BestState = random.choice(BestMoves)
    return bestVal, BestState

In [58]:
def maxValue(State):
    if finished(State):
        return utility(State)
    return max([ minValue(ns) for ns in next_states(State, gPlayers[0]) ])

In [59]:
def minValue(State):
    if finished(State):
        return utility(State)
    return min([ maxValue(ns) for ns in next_states(State, gPlayers[1]) ])

### Beispiel Tic-Tac-Toe

In [60]:
gPlayers = [ 'X', 'O' ]
gStart = tuple( tuple(' ' for col in range(3)) for row in range(3) )
gAllLines = [ [ (row, col) for col in range(3) ] for row in range(3) ] \
          + [ [ (row, col) for row in range(3) ] for col in range(3) ] \
          + [ [ (0, 0), (1, 1), (2, 2) ] ]                             \
          + [ [ (0, 2), (1, 1), (2, 0) ] ]

def next_states(State, player):
    Result = []
    for row in range(3):
        for col in range(3):
            if State[row][col] == ' ':
                NextState           = to_list(State)
                NextState[row][col] = player
                NextState           = to_tuple(NextState)
                Result.append(NextState)
    return Result

def utility(State):
    for Line in gAllLines:
        Marks = { State[row][col] for row, col in Line }
        if len(Marks) == 1 and  Marks != { ' ' }: 
            if Marks == { 'X' }:
                return  1
            else:
                return -1
    for row in range(3):
        for col in range(3):
            if State[row][col] == ' ':
                return None  # the board is not filled  
    # at this point, the board has been filled, but there is no winner, hence it's a draw
    return 0

def finished(State): 
    return utility(State) != None

### Spiel

In [61]:
g_size = 150

def create_canvas():
    n = 3
    canvas = cnv.Canvas(size=(g_size * n, g_size * n + 50))
    display(canvas)
    return canvas

def get_move(State):
    State = to_list(State)
    while True:
        try:
            row, col = input('Enter move here: ').split(',')
            row, col = int(row), int(col)
            if State[row][col] == ' ':
                State[row][col] = 'O'
                return to_tuple(State)
            print("Don't cheat! Please try again.")  
        except:
            print('Illegal input.')  
            print('row and col are numbers from the set {0,1,2}.')

def final_msg(State):
    if finished(State):
        if utility(State) == -1:
            print('You have won!')
        elif utility(State) == 1:
            print('The computer has won!')
        else:
            print("It's a draw.")
        return True
    return False

def draw(State, canvas, value):
    canvas.clear()
    n = len(State)
    canvas.font = '90px sans-serif'
    canvas.text_align    = 'center'
    canvas.text_baseline = 'middle'
    for row in range(n):
        for col in range(n):
            x = col * g_size
            y = row * g_size
            canvas.line_width = 3.0
            canvas.stroke_rect(x, y, g_size, g_size)
            symbol = State[row][col]
            if symbol != ' ':
                x += g_size // 2
                y += g_size // 2
                if symbol == 'X':
                    canvas.fill_style ='red'
                else:
                    canvas.fill_style ='blue'
                canvas.fill_text(symbol, x, y)
    canvas.font = '12px sans-serif'
    canvas.fill_style = 'green'
    for row in range(n):
        for col in range(n):
            x = col * g_size + 16
            y = row * g_size + 141
            canvas.fill_text(f'({row}, {col})', x, y)            
    canvas.font = '20px sans-serif'
    canvas.fill_style = 'black'
    x = 1.5 * g_size
    y = 3.2 * g_size
    canvas.fill_text(str(value), x, y)

def play_game(canvas):
    State = gStart
    while True: 
        val, State = best_move(State);
        draw(State, canvas, f'For me, the game has the value {val}.')
        if finished(State):
            final_msg(State)
            return
        IPython.display.clear_output(wait=True)
        State = get_move(State)
        draw(State, canvas, '')
        if finished(State):
            IPython.display.clear_output(wait=True)
            final_msg(State)
            return

In [62]:
# canvas = create_canvas()
# val = maxValue(gStart)
# draw(gStart, canvas, f'Current value of game for "X": {val}')
# play_game(canvas)

### Ohne vs. mit Memoizierung

In [63]:
%%time
val = maxValue(gStart)
val

CPU times: total: 5.62 s
Wall time: 5.63 s


0

In [64]:
oldMaxValue = maxValue
oldMinValue = minValue

maxValue = memoize(maxValue)
minValue = memoize(minValue)

In [65]:
%%time
val = maxValue(gStart)
val

CPU times: total: 125 ms
Wall time: 126 ms


0

In [66]:
maxValue = oldMaxValue
minValue = oldMinValue

### Ohne vs. mit Bitboard

In [67]:
%%time
val = maxValue(gStart)
val

CPU times: total: 5.81 s
Wall time: 5.8 s


0

In [68]:
gPlayers = [0, 1]
gStart = 0

def set_bits(Bits):
    result = 0
    for b in Bits:
        result |= 1 << b # bitwise or 2**b
    return result

def set_bit(n): 
    return 1 << n

def empty(state):
    Free  = { n for n in range(9) }
    Free -= { n for n in range(9) if state & (1 << n) != 0 } # Spieler X => von Bit 0 - 8
    Free -= { n for n in range(9) if state & (1 << (9 + n)) != 0 } # Spieler O => von Bit 9 - Bit 17
    return Free

def next_states(state, player):
    Empty  = empty(state)
    Result = []
    for n in Empty:
        next_state = state | set_bit(player * 9 + n)
        Result.append(next_state)
    return Result

gAllLines = [ set_bits([0,1,2]), # 1st row
              set_bits([3,4,5]), # 2nd row
              set_bits([6,7,8]), # 3rd row
              set_bits([0,3,6]), # 1st column
              set_bits([1,4,7]), # 2nd column
              set_bits([2,5,8]), # 3rd column
              set_bits([0,4,8]), # falling diagonal
              set_bits([2,4,6]), # rising diagonal
            ]

def utility(state):
    for mask in gAllLines:
        if state & mask == mask:
            return 1               # the computer has won
        if (state >> 9) & mask == mask:
            return -1              # the computer has lost
    # 511 == 2**9 - 1 = 0b1_1111_1111
    # state & 511: Ergibt gesetzten Bits von Spieler X
    # (state & 511) | (state >> 9): Schiebt state um 9 nach Rechts (Bits von Spieler 0) und verknüpft Wert mit vorherigem Ergebnis mit Bitwise OR
    # Falls das Ergebnis 511 Ergeben würde, wären alle Felder von einem Spieler belegt
    if (state & 511) | (state >> 9) != 511: # the board is not yet filled
        return None
    # at this point, the board has been filled, but there is no winner hence its a draw
    return 0 # it's a draw

In [69]:
%%time
val = maxValue(gStart)
val

CPU times: total: 2.95 s
Wall time: 2.95 s


0

### Alpha-Beta-Pruning

In [70]:
def alphaBetaMax(State, alpha, beta):
    if finished(State):
        return utility(State)
    for ns in next_states(State, gPlayers[0]):
        value = alphaBetaMin(ns, alpha, beta)
        if value >= beta: # Falls value (nächster Zug von gPlayers[1]) <= beta (Aktuell gewähltes Minimum von gPlayers[1]) => Wähle direkt value aus und gebe es zurück, da gPlayers[1] danach sowieso beta wählen wird und es somit auch egal ist, ob ein größerer Wert noch zur Auswahl steht
            return value
        alpha = max(alpha, value)
    return alpha

def alphaBetaMin(State, alpha, beta):
    if finished(State):
        return utility(State)
    for ns in next_states(State, gPlayers[1]):
        value = alphaBetaMax(ns, alpha, beta)
        if value <= alpha:
            return value
        beta = min(beta, value)
    return beta

In [71]:
def best_move(State):
    NS        = next_states(State, gPlayers[0])
    bestVal   = alphaBetaMax(State, -1, 1)
    print(f'The state {State} has value {bestVal}.')
    bestMoves = [s for s in NS if alphaBetaMin(s, -1, 1) == bestVal]
    bestState = random.choice(bestMoves)
    print(f'My move is: {bestState}')
    return bestVal, bestState

In [72]:
%%time
val = alphaBetaMax(gStart, -1, 1)
val

CPU times: total: 125 ms
Wall time: 105 ms


0

### Alpha-Beta Pruning with Intelligent Memoization

In [73]:
gCache = {}

def maxValue(State, alpha, beta):
    if finished(State):
        return utility(State)
    for ns in next_states(State, gPlayers[0]):
        value = evaluate(ns, minValue, alpha, beta)
        if value >= beta:
            return value
        alpha = max(alpha, value)
    return alpha

def minValue(State, alpha, beta):
    if finished(State):
        return utility(State)
    for ns in next_states(State, gPlayers[1]):
        value = evaluate(ns, maxValue, alpha, beta)
        if value <= alpha:
            return value
        beta = min(beta, value)
    return beta

def evaluate(State, f, alpha=-1, beta=1):
    global gCache
    if State in gCache:
        flag, v = gCache[State]
        if flag == '=':
            return v
        if flag == '≤':
            if v <= alpha:
                return v
            else:
                beta = min(beta, v)
        if flag == '≥':
            if beta <= v:
                return v
            else:
                alpha = max(alpha, v)
    v = f(State, alpha, beta)
    store_cache(State, alpha, beta, v)
    return v

def store_cache(State, alpha, beta, v):
    global gCache
    if   v <= alpha:
        gCache[State] = ('≤', v)
    elif v <  beta: # alpha < v
        gCache[State] = ('=', v)
    else: # beta <= v
        gCache[State] = ('≥', v)

In [74]:
def best_move(State):
    NS        = next_states(State, gPlayers[0])
    bestValue = evaluate(State, maxValue, -1, 1)
    BestMoves = [s for s in NS if evaluate(s, minValue, -1, 1) == bestValue]
    BestState = random.choice(BestMoves)
    return bestValue, BestState

In [75]:
%%time
v = evaluate(gStart, maxValue, -1, 1)
v

CPU times: total: 31.2 ms
Wall time: 25 ms


0