In [7]:
import chess
import chess.pgn
import requests
import io
from tqdm import tqdm

In [8]:
def fen_to_detailed_description(fen):
    board = chess.Board(fen)

    description = []

    # 1. Whose turn
    turn = "White" if board.turn else "Black"
    description.append(f"{turn} to move.")

    # 2. Castling rights
    castling_desc = []
    if board.has_kingside_castling_rights(chess.WHITE):
        castling_desc.append("White can castle kingside")
    if board.has_queenside_castling_rights(chess.WHITE):
        castling_desc.append("White can castle queenside")
    if board.has_kingside_castling_rights(chess.BLACK):
        castling_desc.append("Black can castle kingside")
    if board.has_queenside_castling_rights(chess.BLACK):
        castling_desc.append("Black can castle queenside")
    if castling_desc:
        description.append(". ".join(castling_desc) + ".")
    else:
        description.append("No castling rights remaining.")

    # 3. En passant
    if board.ep_square:
        description.append(f"En passant capture available at {chess.square_name(board.ep_square)}.")

    # 4. Piece locations
    piece_names = {
        chess.PAWN: "pawn",
        chess.KNIGHT: "knight",
        chess.BISHOP: "bishop",
        chess.ROOK: "rook",
        chess.QUEEN: "queen",
        chess.KING: "king",
    }

    def describe_pieces(color):
        color_name = "White" if color == chess.WHITE else "Black"
        parts = []
        for piece_type in piece_names:
            squares = list(board.pieces(piece_type, color))
            if squares:
                locs = [chess.square_name(sq) for sq in squares]
                piece_desc = f"{len(locs)} {piece_names[piece_type]}{'s' if len(locs) > 1 else ''} on {', '.join(locs)}"
                parts.append(piece_desc)
        return f"{color_name} has: " + "; ".join(parts) + "."

    description.append(describe_pieces(chess.WHITE))
    description.append(describe_pieces(chess.BLACK))

    return " ".join(description)

In [9]:
usernames = ["MagnusCarlsen"]

all_games = []
for username in usernames:
    url_archives = f'https://api.chess.com/pub/player/{username}/games/archives'
    headers = {'User-Agent': 'My Python App'}
    archives = requests.get(url_archives, headers=headers).json()['archives']
    
    # Iterate through each archive URL
    for archive_url in tqdm(archives):
        url = f'{archive_url}/pgn'
        response = requests.get(url, headers=headers)
        
        # Check if the request was successful
        if response.status_code == 200:
            pgn_io = io.StringIO(response.text)
            
            # Read all games from this archive
            while True:
                game = chess.pgn.read_game(pgn_io)
                if game is None:
                    break
                all_games.append(game)
            
        else:
            print(f"Failed to fetch {archive_url}, status code: {response.status_code}")

100%|██████████| 49/49 [00:34<00:00,  1.44it/s]


In [12]:
from langchain.schema import Document

documents = []

for game in tqdm(all_games[:500], desc = "Parsing games"):
    board = chess.Board()

    for move in game.mainline_moves():
        # fen = re.sub(r" \d+ \d+$", "", board.fen())
        try:
            metadata = {
                "date" : game.headers.get("Date"),
                "white" : game.headers.get("White"),
                "whiteElo": game.headers.get("WhiteElo"),
                "black" : game.headers.get("Black"),
                "blackElo": game.headers.get("BlackElo"),
                "result" : game.headers.get("Result"),
                "termination": game.headers.get("Termination"),
                "url" : game.headers.get("Link"),
                'fen': fen_to_detailed_description(board.fen()),
                'move': board.san(move)
            }
            
            document = Document(metadata = metadata, page_content = board.fen())
            documents.append(document)
            board.push(move)

        except:
            pass

Parsing games: 100%|██████████| 500/500 [00:14<00:00, 33.68it/s]


In [13]:
%run ../bedrock_setup.py

In [14]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  embeddings_model = HuggingFaceBgeEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [17]:
vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings_model)
vectorstore.save_local("../database/natural_language_test")

In [18]:
fens = [
    "r1bqkbnr/pppp1ppp/2n5/4p3/1bP5/5N2/PP1PPPPP/RNBQKB1R w KQkq - 2 4",
    "rnbq1rk1/pp3pbp/3p1np1/2pPp3/2P1P3/2N2N2/PP2BPPP/R1BQ1RK1 w - - 3 9",
    "r3k2r/pp1n1ppp/2pb1q2/3p4/3P4/2N1BN2/PPP2PPP/R2Q1RK1 w kq - 4 11",
    "r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/5N2/PPPP1PPP/RNBQK2R b KQkq - 2 4",
    "2rq1rk1/pp2bppp/3p1n2/3P4/2P1n3/2N2N2/PP1B1PPP/R2QR1K1 w - - 5 13",
    "r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 2 3"
]

moves = []
for fen in fens:
    n_fen = fen_to_detailed_description(fen)
    embed = embeddings_model.embed_query(n_fen)
    results = vectorstore.similarity_search_by_vector(embed, k=10)
    p_moves = []
    
    for doc in results:
        p_moves.append(doc.metadata['move'])
    moves.append(set(p_moves))

In [19]:
for i, fen in enumerate(fens):
    print(fen)
    p_moves = moves[i]
    for move in p_moves:
        board = chess.Board(fen)
        try:
            board.push_san(move)
            print(move)
        except:
            pass

r1bqkbnr/pppp1ppp/2n5/4p3/1bP5/5N2/PP1PPPPP/RNBQKB1R w KQkq - 2 4
rnbq1rk1/pp3pbp/3p1np1/2pPp3/2P1P3/2N2N2/PP2BPPP/R1BQ1RK1 w - - 3 9
Bd2
r3k2r/pp1n1ppp/2pb1q2/3p4/3P4/2N1BN2/PPP2PPP/R2Q1RK1 w kq - 4 11
Bd2
r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/5N2/PPPP1PPP/RNBQK2R b KQkq - 2 4
2rq1rk1/pp2bppp/3p1n2/3P4/2P1n3/2N2N2/PP1B1PPP/R2QR1K1 w - - 5 13
r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 2 3
