In [1]:
%run ../bedrock_setup.py

In [2]:
import chess
import chess.engine
import sys
import os
import json
from tqdm import tqdm

sys.path.append(os.path.abspath('../'))
from recommendation_agent import create_recommendation_agent

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings_model = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

all_games = FAISS.load_local("../database/all_games", embeddings_model, allow_dangerous_deserialization=True)


model = init_chat_model("us.anthropic.claude-3-5-haiku-20241022-v1:0",
                      model_provider="bedrock_converse",
                      region_name="us-east-1",
                      client=bedrock_client)

engine = chess.engine.SimpleEngine.popen_uci("../Stockfish/src/stockfish")

recommendation_agent = create_recommendation_agent(model, embeddings_model, all_games, engine)

In [5]:
with open("../chess_fens_by_phase.json", "r") as f:
    fens = json.load(f)

In [30]:
from langchain_core.messages import HumanMessage
import re

def get_eval_score(board):
    info = engine.analyse(board, chess.engine.Limit(time=0.1))
    score = info["score"].white()  # always get score from white’s perspective
    if score.is_mate():
        return 100000 if score.mate() > 0 else -100000
    return score.score()

In [39]:
def compare_agent_and_engine(fens):
    agent_moves = []
    engine_moves = []
    agent_losses = []
    engine_losses = []
    
    for fen in fens:
        board = chess.Board(fen)
        
        query = f"""
        Given this chess position in FEN notation: {fen}
        Analyze the position and recommend the single best move.
        Return only the move in standard algebraic notation (e.g., "e4", "Nf3", "Bxc6") using the exact format below without any additional text:
        The recommended move is [move].
        """
    
        # Agent move
        response = recommendation_agent.invoke({"messages": [HumanMessage(content=query)]})
        agent_response = response["messages"][-1].content
        match = re.search(r"\b([KQRBN]?[a-h]?[1-8]?x?[a-h][1-8](=[QRBN])?[+#]?)\b", agent_response)
        try:
            agent_move = match.group(1)
        except:
            agent_move = "not found"
    
        # Engine best move
        engine_best = engine.play(board, chess.engine.Limit(time=0.1)).move
    
        # Evaluate original position
        orig_score = get_eval_score(board)
    
        # Evaluate agent move
        try:
            board.push_san(agent_move)
            agent_score = get_eval_score(board)
            board.pop()
            agent_loss = orig_score - agent_score
        except:
            if agent_move == "not_found":
                agent_loss = None
            else:
                agent_move = "illegal"
                agent_loss = None
            
        # Evaluate engine move
        board.push(engine_best)
        engine_score = get_eval_score(board)
        board.pop()
        engine_loss = orig_score - engine_score
    
        print(f"FEN: {fen}")
        if agent_loss is not None:
            print(f"Agent move:  {agent_move} ({agent_loss:+} cp)")
        else:
            print(f"Agent move: {agent_move} (N/A cp)")
        print(f"Engine move: {board.san(engine_best)} ({engine_loss:+} cp)")
        print("-" * 40)

        agent_moves.append(agent_move)
        engine_moves.append(board.san(engine_best))
        agent_losses.append(agent_loss)
        engine_losses.append(engine_loss)

    return [agent_moves, engine_moves, agent_losses, engine_losses]

In [40]:
early_game = compare_agent_and_engine(fens['early_game'])

FEN: rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
Agent move:  e4 (-7 cp)
Engine move: e4 (+0 cp)
----------------------------------------
FEN: rnbqkbnr/pppp1ppp/8/4p3/3P4/8/PPP2PPP/RNBQKBNR b KQkq - 0 2
Agent move:  Nf6 (-227 cp)
Engine move: exd4 (+18 cp)
----------------------------------------
FEN: r1bqkbnr/pppppppp/2n5/8/2B5/5N2/PPPPPPPP/RNBQK2R b KQkq - 2 3
Agent move:  d5 (-15 cp)
Engine move: d5 (-27 cp)
----------------------------------------
FEN: r1bqk2r/ppppbppp/2n2n2/4p3/2B1P3/2NP1N2/PPP2PPP/R1BQK2R w KQkq - 5 5
Agent move:  Bb5 (+31 cp)
Engine move: Bb5 (+28 cp)
----------------------------------------
FEN: rnbqkb1r/pppp1ppp/5n2/4p3/2B1P3/8/PPPP1PPP/RNBQK1NR w KQkq - 2 3
Agent move:  Nc3 (+10 cp)
Engine move: d3 (+23 cp)
----------------------------------------
FEN: r1bqkbnr/pp1p1ppp/2n1p3/2p5/4P3/2N2N2/PPPP1PPP/R1BQKB1R w KQkq - 0 5
Agent move:  d4 (+2 cp)
Engine move: d4 (-1 cp)
----------------------------------------
FEN: r1bqk2r/pp1nbppp/2n1p3/2ppP3/3P4/2

In [41]:
mid_game = compare_agent_and_engine(fens['mid_game'])

FEN: r1bq1rk1/pppp1ppp/2n2n2/2b1p3/2B1P3/2NP1N2/PPP2PPP/R1BQ1RK1 w - - 8 7
Agent move:  d4 (+103 cp)
Engine move: Nd5 (+4 cp)
----------------------------------------
FEN: r2q1rk1/pp2bppp/2n1pn2/2pp4/3P1B2/2P1PN2/PP1N1PPP/R2QKB1R w KQ - 6 8
Agent move:  Be2 (-1 cp)
Engine move: h4 (+24 cp)
----------------------------------------
FEN: 2rq1rk1/pp2bppp/2n1pn2/3p4/2pP4/2N1PN2/PPQ2PPP/R1B2RK1 b - - 9 10
Agent move:  Re8 (-23 cp)
Engine move: a6 (-2 cp)
----------------------------------------
FEN: 2kr1b1r/pppbqppp/2np1n2/8/2BPP3/2N2N2/PPP2PPP/R1BQ1RK1 w - - 6 8
Agent move:  d5 (+61 cp)
Engine move: h3 (-9 cp)
----------------------------------------
FEN: r1bq1rk1/pp1n1ppp/2pbpn2/3p4/3P4/2NBPN2/PP3PPP/R1BQ1RK1 w - - 7 7
Agent move:  Re1 (+41 cp)
Engine move: e4 (+0 cp)
----------------------------------------
FEN: r2q1rk1/1b2bppp/p1n1pn2/1pp5/3P4/2N1PN2/PPQ1BPPP/2KR1B1R w - - 11 10
Agent move:  Bd3 (+190 cp)
Engine move: dxc5 (-8 cp)
----------------------------------------
FEN: r4rk1/1ppq1

In [42]:
end_game = compare_agent_and_engine(fens['end_game'])

FEN: 8/5pk1/6p1/5p2/8/6P1/5P1K/8 w - - 40 30
Agent move:  Kh3 (+35 cp)
Engine move: Kg1 (+75 cp)
----------------------------------------
FEN: 8/5k2/8/2p5/2P5/8/5K2/8 w - - 44 35
Agent move:  Ke3 (+0 cp)
Engine move: Ke3 (+0 cp)
----------------------------------------
FEN: 6k1/5pp1/5n1p/8/8/5PPP/6K1/8 w - - 45 35
Agent move:  g4 (+50 cp)
Engine move: Kf2 (+16 cp)
----------------------------------------
FEN: 8/5pk1/4p1p1/8/1KP5/8/8/8 b - - 48 40
Agent move:  e5 (+15 cp)
Engine move: Kf6 (+24 cp)
----------------------------------------
FEN: 8/8/6k1/5p2/5P2/6K1/8/8 w - - 50 45
Agent move:  Kf3 (+0 cp)
Engine move: Kh3 (+0 cp)
----------------------------------------
FEN: 6k1/5ppp/8/8/8/5PPP/6K1/8 w - - 40 35
Agent move:  g4 (-1 cp)
Engine move: Kf2 (-6 cp)
----------------------------------------
FEN: 8/5kp1/6p1/8/4K3/8/8/8 b - - 60 50
Agent move:  g5 (-423 cp)
Engine move: Kf6 (+144 cp)
----------------------------------------
FEN: 8/8/8/6k1/5p2/6P1/5K2/8 w - - 70 60
Agent move:  Kg1 

In [45]:
import json

with open('../eval_early.json', 'w') as f:
    json.dump(early_game, f)

with open('../eval_mid.json', 'w') as f:
    json.dump(mid_game, f)

with open('../eval_end.json', 'w') as f:
    json.dump(end_game, f)