In [19]:
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm

In [23]:
documents = []
types = ["beginners", "strategy", "tactics", "opening-theory", "middlegame", "endgames"]
for t in tqdm(types, desc = "Processing articles"):
    for page_number in range(1, 10):
        base_url = f"https://www.chess.com/articles/{t}?page={page_number}"
        response = requests.get(base_url)
        soup = BeautifulSoup(response.text, "html.parser")
        
        articles = soup.find_all("article")
        for article in articles:
            link = article.find("a")["href"]
            response = requests.get(link)
            soup = BeautifulSoup(response.text, "html.parser")
            article = soup.find_all("article")[0]
            title = article.find("div", class_="post-view-header").get_text(strip=True)
            content = article.find("div", class_="post-view-content").get_text(strip=True)
            document = {"type":t, "title":title, "content":content}
            documents.append(document)

100%|██████████| 6/6 [05:34<00:00, 55.75s/it]


In [1]:
%run ../bedrock_setup.py

In [2]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS

In [3]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  embeddings_model = HuggingFaceBgeEmbeddings(
IOStream.flush timed out
  from .autonotebook import tqdm as notebook_tqdm


In [25]:
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

contents = []
for document in documents:
    metadata = {
        "type": document["type"],
        "title": document["title"]
    }
    doc = Document(metadata=metadata, page_content = document["content"])
    contents.append(doc)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(contents)

In [28]:
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings_model)
vectorstore.save_local("../database/knowledge_base")

In [29]:
retriever = vectorstore.as_retriever(search_kwargs={'k': 4})

In [31]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

llm = ChatBedrockConverse(
    client=bedrock_client,
    model_id="us.amazon.nova-micro-v1:0",
)

system_prompt = (
    "You are an assistant for question-answering tasks on chess. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Be as descriptive as possible while still being "
    "factual and coherent."
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [39]:
q = """
    What is the best move in this position: r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 2 3?
    Give your answer in SAN format.
    """

results = rag_chain.invoke(
    {"input": q}
)

In [41]:
results["context"]

[Document(id='12b5a5b9-9f88-4839-9a60-3809f749419c', metadata={'type': 'tactics', 'title': 'PogChamps 3 Chess Puzzles: Finals'}, page_content='information on PogChamps 3, readthis guide.Could you find the right moves in each of these positions? How difficult was it for you to find them? Leave a comment below to let us know!Other PogChamps 3 puzzles:PogChamps 3 Chess Puzzles: Day 1\u200ePogChamps 3 Chess Puzzles: Day 2PogChamps 3 Chess Puzzles: Day 3PogChamps 3 Chess Puzzles: Day 4PogChamps 3 Chess Puzzles: Day 5PogChamps 3 Chess Puzzles: Day 6PogChamps 3 Chess Puzzles: Day 7PogChamps 3 Chess Puzzles: Day 8PogChamps 3 Chess Puzzles: Day 9PogChamps 3 Chess Puzzles: Day 10PogChamps 3 Chess Puzzles: Days 11 And 12'),
 Document(id='f7f68fb7-6e62-4c2c-9496-679e700792d3', metadata={'type': 'opening-theory', 'title': 'Transpositions'}, page_content="let me explain the key ideas that one needs to understand to be able to recognize such transpositions. Study the pawn structures thoroughly and tr

In [40]:
results["answer"]

"To determine the best move in the given chess position, let's analyze the board:\n\n```\nr1bqkbnr\npppp1ppp\n2n5\n4p3\n4P3\n5N2\nPPPP1PPP\nRNBQKB1R\n```\n\nWhite to move and play:\n\n```\nr1bqkbnr\npppp1ppp\n2n5\n4p3\n4P3\n5N2\nPPPP1PPP\nRNBQKB1R\n```\n\nHere is a step-by-step analysis:\n\n1. **Identify the key features**:\n   - White has a passed pawn on `d5` that is supported by the knight on `c3`.\n   - Black's king is in the center, which can be a weakness.\n   - Black has a pawn on `c6` that can be attacked by the `d5` pawn.\n\n2. **Evaluate the pawn structure**:\n   - White's pawn on `d5` is a strong passed pawn.\n   - Black's pawn structure is somewhat passive, and the `c6` pawn is isolated.\n\n3. **Consider the immediate threats**:\n   - The `d5` pawn can advance to `d4` and potentially create a strong pawn chain or push further to `d3` to create a passed pawn storm.\n   - The knight on `c3` supports the `d5` pawn and can jump to `d5` if needed.\n\n4. **Plan**:\n   - The best 