In [None]:
'''
TODO: 
learn by adding layers of neurons (and translation/rotate functions?)
replay backward through games & learn from it
'''

In [None]:
import chess
import chess.svg
import numpy as np
from IPython.display import clear_output
import random

In [None]:
def serialize(board):
    # https://github.com/geohot/twitchchess
    '''
    in: board
    out: [32 x 8] bitmap serialized board
    '''
    assert board.is_valid()

    bstate = np.zeros(64, np.uint8)
    for i in range(64):
        pp = board.piece_at(i)
        if pp is not None:
            bstate[i] = {"P": 1, "N": 2, "B": 3, "R": 4, "Q": 5, "K": 6, \
                 "p": 9, "n":10, "b":11, "r":12, "q":13, "k": 14}[pp.symbol()]
    if board.has_queenside_castling_rights(chess.WHITE):
        assert bstate[0] == 4
        bstate[0] = 7
    if board.has_kingside_castling_rights(chess.WHITE):
        assert bstate[7] == 4
        bstate[7] = 7
    if board.has_queenside_castling_rights(chess.BLACK):
        assert bstate[56] == 8+4
        bstate[56] = 8+7
    if board.has_kingside_castling_rights(chess.BLACK):
        assert bstate[63] == 8+4
        bstate[63] = 8+7
    if board.ep_square is not None:
        assert bstate[board.ep_square] == 0
        bstate[board.ep_square] = 8
    bstate = bstate.reshape(8,8)

    state = np.zeros((4,8,8), np.uint8)
    state[0] = (bstate>>3)&1
    state[1] = (bstate>>2)&1
    state[2] = (bstate>>1)&1
    state[3] = (bstate>>0)&1

    return state.reshape(-1, 8)

def bitlist_to_int(e):
    '''
    in: [bits]
    out: int
    '''
    score = 0
    for b in e:
        score = (score << 1) | b
    return score

def look2moves(board):
    '''
    in: board
    out: {legal_moves:<legal_moves in resulting position>}
    '''
    ms = [m for m in board.legal_moves]
    m_dict = {}
    for m in ms:
        b = board.copy()
        b.push(m)
        m_dict[m] = b.legal_moves
    return m_dict

def compress_evaluations(meval_dict):
    '''
    in: {moves:[<int evaluations of resulting positions>}
    out: {moves: [<int evaluation of worst resulting position]}
    '''
    meval_compressed_dict = {}
    for m, meval in meval_dict.items():
        meval_compressed_dict[m] = max(meval, default=0)
    return meval_compressed_dict

def objective_result(board):
    '''
    in: board
    out: <objective result>
        -1 means lost
        0 means unknown
        1 means draw
    '''
    outcome = board.outcome(claim_draw=True)
    if outcome is not None:
        return -1 if outcome.winner else 1
    return 0

In [None]:
class Player:
    def __init__(self):
        self.neurons = np.random.randint(0, 2, (1, 32, 8))
        self.gradients = np.asarray(np.random.randint(0, 2, (self.neurons.shape)))
        self.perceptions = {}
        self.judgments = {}
        self.knowledge = {}
    
    def learn(self):
        '''
        compute and apply gradients (difference between knowledge and judgments) with a random mask
        '''
        for ix, outcome in self.knowledge.items():
            results = []
            x = serialize(self.perceptions[ix])
            dloss_dx = self.judgments[ix] ^ outcome
            dloss_dws = np.zeros_like(self.neurons)
            for i in range(len(self.neurons)):
                dloss_dws[i] = 1 ^ x & dloss_dx
                dloss_dx = 1 ^ self.neurons[i] & dloss_dx
            self.neurons = self.neurons ^ self.gradients & dloss_dws
            self.gradients = np.asarray(np.random.randint(0, 2, (self.neurons.shape)))
            
    def think(self, s_board):
        '''
        in, out: 32x8 byte mapping of board
        NAND board bytes with neurons, then split and xor the outputs together until you get a single byte result
        '''
        x = s_board
        for neuron_layer in self.neurons:
            x = 1^(np.logical_and(x, neuron_layer))
        while len(x) > 1:
            x1, x2 = np.split(x, 2)
            x = x1 ^ x2
        return x
    
    def act(self, board):
        '''
        input: board
        output: (<best move>, <eval>) || result (if game over)
        '''
        result = objective_result(board)
        if not result:
            evals = self.evaluate(board, look2moves(board))
            compressed_evals = compress_evaluations(evals)
            best_move = min(compressed_evals, key=compressed_evals.get)
            beval = compressed_evals[best_move]
            if board not in self.perceptions.values():
                ix = len(self.perceptions)
                self.perceptions[ix] = board.copy()
            else:
                ix = list(self.perceptions.keys())[list(self.perceptions.values()).index(board)]
            self.judgments[ix] = beval
            board.push(best_move)
            return True
        if board not in self.perceptions.values():
                ix = len(self.perceptions)
                self.perceptions[ix] = board
        else:
            ix = list(self.perceptions.keys())[list(self.perceptions.values()).index(board)]
        self.judgments[ix] = self.think(serialize(board))
        self.knowledge[ix] = (result>0) * 128
        return False
    
    def value(self, board):
        '''
        in: board
        out: <byte evaluation>
        '''
        bbytes = self.think(serialize(board))
        while len(bbytes)>1:
            bbytes.append(bbytes.pop() ^ bbytes.pop())
        return bbytes[0]

    def value_move(self, board, move):
        '''
        in: board, move
        out: <int evaluation of resulting position>
        '''
        b = board.copy()
        b.push(move)
        return bitlist_to_int(self.value(b))
    
    def evaluate(self, board, m_dict):
        '''
        in: board, {legal_moves:[<legal moves in resulting position>]}
        out: {legal_moves:[<int evaluation of resulting positions>}
        '''
        meval_dict = {}
        for m, lms in m_dict.items():
            b = board.copy()
            b.push(m)
            m1_evals = [self.value_move(b, lm) for lm in list(lms)]
            meval_dict[m] = m1_evals
        return meval_dict
    
    def play(self, n_games):
        '''
        play n games and learn from them (backpropogate)
        '''
        board = chess.Board()
        for i in range(n_games):
            while self.act(board):
                pass
            self.learn()
    def play_vs(self):
        '''
        play against the ai!
        yields board position after each move
        '''
        board = chess.Board()
        while p.act(board): 
            yield chess.svg.board(board, size=300)
            user_turn = True
            while user_turn:
                user_move = input()
                try:
                    if chess.Move.from_uci(user_move) in board.legal_moves:
                        board.push(chess.Move.from_uci(user_move))
                        user_turn = False
                except: print('input a legal move in the form <square from><square to>\nex: g1f3')

In [None]:
p = Player()
p.play(1)

In [None]:
for bd in p.play_vs():
    clear_output(wait=True)
    display(bd) 