In [2]:
import requests
import io
import random
import chess.pgn
from tqdm import tqdm
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [3]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings_model = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
usernames = [
    'Hikaru',
    'MagnusCarlsen',
    'GukeshDommaraju',
    'FabianoCaruana',
    'GHANDEEVAM2003'
]

headers = {'User-Agent': 'My Python App'}

for username in usernames:
    print(f"Processing player: {username}")
    player_games = []

    try:
        url_archives = f'https://api.chess.com/pub/player/{username}/games/archives'
        archives = requests.get(url_archives, headers=headers).json()['archives']
    except Exception as e:
        print(f"Error fetching archives for {username}: {e}")
        continue

    for archive_url in tqdm(archives, desc=f"Getting PGNs for ({username})", leave=False):
        url = f'{archive_url}/pgn'
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            pgn_io = io.StringIO(response.text)
            while True:
                game = chess.pgn.read_game(pgn_io)
                if game is None:
                    break
                player_games.append(game)
        else:
            print(f"Failed to fetch {archive_url}, status code: {response.status_code}")

    documents = []
    for game in tqdm(player_games, desc=f"Parsing games for ({username})", leave=False):
        board = game.board()
        try:
            if game.headers.get("White") == username:
                for move in game.mainline_moves():
                    if board.turn == chess.WHITE:
                        metadata = {
                            "date": game.headers.get("Date"),
                            "white": game.headers.get("White"),
                            "whiteElo": game.headers.get("WhiteElo"),
                            "black": game.headers.get("Black"),
                            "blackElo": game.headers.get("BlackElo"),
                            "result": game.headers.get("Result"),
                            "termination": game.headers.get("Termination"),
                            "url": game.headers.get("Link"),
                            "move": board.san(move),
                            "fen": board.fen()
                        }
                        doc = Document(metadata=metadata, page_content=board.fen())
                        documents.append(doc)
                    board.push(move)

            elif game.headers.get("Black") == username:
                for move in game.mainline_moves():
                    if board.turn == chess.BLACK:
                        metadata = {
                            "date": game.headers.get("Date"),
                            "white": game.headers.get("White"),
                            "whiteElo": game.headers.get("WhiteElo"),
                            "black": game.headers.get("Black"),
                            "blackElo": game.headers.get("BlackElo"),
                            "result": game.headers.get("Result"),
                            "termination": game.headers.get("Termination"),
                            "url": game.headers.get("Link"),
                            "move": board.san(move),
                            "fen": board.fen()
                        }
                        doc = Document(metadata=metadata, page_content=board.fen())
                        documents.append(doc)
                    board.push(move)
        except:
            pass
            
    sample_size = min(100000, len(documents))
    sampled_documents = random.sample(documents, sample_size)
    print(len(sampled_documents))
    
    print(f"Building vectorstore for {username}")
    vectorstore = FAISS.from_documents(sampled_documents, embedding=embeddings_model)
    vectorstore.save_local(f"../database/{username}")

Processing player: Hikaru


                                                                                   

100000
Building vectorstore for Hikaru
Processing player: MagnusCarlsen


                                                                                       

100000
Building vectorstore for MagnusCarlsen
Processing player: GukeshDommaraju


                                                                                         

100000
Building vectorstore for GukeshDommaraju
Processing player: FabianoCaruana


                                                                                        

100000
Building vectorstore for FabianoCaruana
Processing player: GHANDEEVAM2003


                                                                                          

100000
Building vectorstore for GHANDEEVAM2003
