In [14]:
import chromadb
from chromadb.utils import embedding_functions
from langchain_community.document_loaders import PyPDFLoader 
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_ollama import OllamaLLM
from langchain import PromptTemplate


import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [15]:
file_path = "data/laws_of_chess.pdf"
def load_and_split_doc(file_path):
    loader=PyPDFLoader(file_path)
    doc = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap= 200,
    length_function = len,
    is_separator_regex=False
    )
    chunks = text_splitter.split_documents(doc)
    # print("# of docs: {0}".format(len(chunks)))
    # print("Sample metadata: {0}".format(chunks[0].metadata))
    # print("Sample content: {0}".format(chunks[0].page_content))
    return chunks



In [16]:
def get_ids(chunks):
    ids = []
    current_page = 0
    count = 1
    for chunk in chunks:
        page = chunk.metadata.get("page")
        source = chunk.metadata.get("source")
        if page > current_page:
            count = 1
            current_page = page
        ids.append(f"{source}:{current_page}:{count}")
        count += 1
    return ids

In [17]:
chunks = load_and_split_doc(file_path=file_path)
ids = get_ids(chunks)
# print(f"ids: {ids}")

In [18]:
for chunk in chunks:
    print(chunk.page_content+ "\n\n ----------------------------------\n\n")


1 
E.I.01 
 
FIDE LAWS of CHESS 
 
Contents: 
 
 PREFACE page 2 
 
 BASIC RULES OF PLAY 
 
Article 1: The nature and objectives of the game of chess page 2 
 
Article 2: The initial position of the pieces on the chessboard page 3 
 
Article 3: The moves of the pieces page 4 
 
Article 4: The act of moving the pieces page 7 
 
Article 5: The completion of the game page 8 
 
 COMPETITION RULES 
 
Article 6: The chess clock page 9 
 
Article 7: Irregularities page 11

 ----------------------------------


Article 4: The act of moving the pieces page 7 
 
Article 5: The completion of the game page 8 
 
 COMPETITION RULES 
 
Article 6: The chess clock page 9 
 
Article 7: Irregularities page 11 
 
Article 8: The recording of the moves page 11 
 
Article 9: The drawn game page 12 
 
Article 10: Quickplay finish page 13 
 
Article 11: Points page 14 
 
Article 12: The conduct of the players page 14 
 
Article 13: The role of the arbiter (see Preface) page 15 
 
Article 14: FIDE page 16

 ----

In [19]:
# initialise chromadb
# client = chromadb.Client()

# create in-memory collection
# collection = client.create_collection(name="chess_collection", embedding_function= embedding_function)

# initalise embedding function
def get_embedding_function():
    embedding_function = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
    return embedding_function
 
embedding_function = get_embedding_function()


vectordb = Chroma.from_documents(
    documents=chunks,
    ids= ids,
    embedding= embedding_function,
    persist_directory="chess_chroma_db"
    )

OllamaEmbeddings: 100%|██████████| 180/180 [00:05<00:00, 32.46it/s]


In [24]:
query = "What is the objective of the game?"

# query the db
results = vectordb.similarity_search_with_relevance_scores(
    query= query,
    k=4
)

context = "\n\n---------------------------------------------\n\n".join([doc[0].page_content for doc in results])

prompt_template = PromptTemplate(
    input_variables=["context","question"],
    template="Use the following context to answer the question.\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
    )

prompt = prompt_template.format(context = context, question=query)

llm = OllamaLLM(model="llama3.2:latest")

response_text = llm.invoke(prompt)
print(context)
print(response_text)

OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 18.41it/s]
  results = vectordb.similarity_search_with_relevance_scores(


(See Article 6.7) 
 
1.2 The objective of each player is to place the opponent’s king ‘ under attack’ in such a way 
that the opponent has no legal move . The player who achieves this goal is said to have 
‘checkmated’ the opponent’s king and to have won the game. Leaving one’s own king 
under attack, exposing one’s own king to attack and also ’capturing’ the opponent’s king 
are not allowed. The opponent whose king has been checkmated has lost the game.

---------------------------------------------

BASIC RULES OF PLAY 
 
 
Article 1: The nature and objectives of the game of chess 
 
1.1 The game of chess is played between two opponents who move their pieces alternately 
on a square board called a ‘chessboard’. The player with the white pieces commences the 
game. A player is said to ‘ have the move ’, when his opponent’s move has been ‘ made’. 
(See Article 6.7) 
 
1.2 The objective of each player is to place the opponent’s king ‘ under attack’ in such a way

-----------------------

In [21]:
print(context)

F. Chess960 Rules 
 
F.1 Before a Chess960 game a starting position is randomly set up, subj ect to certain rules. After 
this, the game is played in the same way as standard chess. In particular, pieces and pawns 
have their normal moves, and each player's objective is to checkmate the opponent's king. 
 
F.2 Starting position requirements 
The starting position for Chess960 must meet certain rules. White pawns are placed on the

---------------------------------------------

a. do not conflict in any way with the official FIDE Laws of Chess, and 
b. are limited to the territory of the federation concerned, and 
c. are not valid for any FIDE match, championship or qualifying event, or for a FIDE title or 
rating tournament. 
 
 
 
 
BASIC RULES OF PLAY 
 
 
Article 1: The nature and objectives of the game of chess 
 
1.1 The game of chess is played between two opponents who move their pieces alternately

---------------------------------------------

BASIC RULES OF PLAY 
 
 
Article 1

In [None]:
from langchain.chains import RetrievalQA

llm = OllamaLLM(model="llama3.2:latest")

prompt = PromptTemplate(
    input_variables=["context","input"],
    template="Use the following context to answer the question.\n\nContext:\n{context}\n\nQuestion:\n{input}\n\nAnswer:"
    )

combine_chain = create_stuff_documents_chain(
    llm = llm,
    prompt=prompt,
)

qa_chain = create_retrieval_chain(
    retriever=vectordb.as_retriever(search_kwargs={"k":3}),
    combine_docs_chain=combine_chain,
)

# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=vectordb.as_retriever(search_kwargs={"k":3}),
#     chain_type="stuff",
#     chain_type_kwargs={"prompt": prompt},
#     verbose=True,
#     input_key="input"
# )

res = qa_chain.invoke({"input":query})

# res = qa_chain.invoke({"input":query})
print("Answer:", res["answer"])
print(len(res["context"]))
for doc in res["context"]:
    print(f"\n\n--------------------------  \n\nContext:, {doc.page_content}")


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.62it/s]


Answer: The objective of the game is to place the opponent's king "under attack" in such a way that the opponent has no legal move. This results in checkmating the opponent's king and winning the game.
3


-------------------------- 

Context:, (See Article 6.7) 
 
1.2 The objective of each player is to place the opponent’s king ‘ under attack’ in such a way 
that the opponent has no legal move . The player who achieves this goal is said to have 
‘checkmated’ the opponent’s king and to have won the game. Leaving one’s own king 
under attack, exposing one’s own king to attack and also ’capturing’ the opponent’s king 
are not allowed. The opponent whose king has been checkmated has lost the game.


-------------------------- 

Context:, BASIC RULES OF PLAY 
 
 
Article 1: The nature and objectives of the game of chess 
 
1.1 The game of chess is played between two opponents who move their pieces alternately 
on a square board called a ‘chessboard’. The player with the white pieces commen

In [34]:
# from langchain.chat_models import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

llm = OllamaLLM(model="llama3.2:latest")

prompt = PromptTemplate(
    input_variables=["context", "input"],
    template="""
Use the following context to answer the question.

Context:
{context}

Question:
{input}

Answer:
"""
)

combine_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(
    retriever=vectordb.as_retriever(k=3),
    combine_docs_chain=combine_chain
)

res = qa_chain.invoke({"input": query})
print(res["answer"])


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 21.86it/s]


According to Article 1: The nature and objectives of the game of chess, the objective of each player is to place the opponent's king "under attack" in such a way that the opponent has no legal move. This results in checkmating the opponent's king and winning the game.


In [None]:
from langchain.chains.question_answering import load_qa_chain

# A) Set up your LLM
llm = OllamaLLM(model="llama3.2:latest")

# B) Choose a prompt template that matches load_qa_chain's expected keys
#    load_qa_chain uses "input_documents" for the docs and "question" for the query.
prompt = PromptTemplate(
    input_variables=["input_documents","input"],
    template=(
        "Use the following documents to answer the question.\n\n"
        "{input_documents}\n\n"
        "Question: {input}\n"
        "Answer:"
    ),
)

# C) Load a “stuff” chain (single-prompt) around your LLM
combine_chain = load_qa_chain(
    llm=llm,
    chain_type="stuff",
    prompt=prompt
)

# 1) Retrieve your top-3 docs
relevant_docs = retriever.get_relevant_documents("How many pieces does each player start with?")

# 2) Call the combine chain directly
answer = combine_chain.run(
    input_documents=relevant_docs,
    question="How many pieces does each player start with?"
)

print("Answer:", answer)

ValidationError: 1 validation error for StuffDocumentsChain
  Value error, document_variable_name context was not found in llm_chain input_variables: ['input_documents', 'question'] [type=value_error, input_value={'llm_chain': LLMChain(ve...None, 'callbacks': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error

In [None]:
!ollama list

NAME                       ID              SIZE      MODIFIED     
nomic-embed-text:latest    0a109f422b47    274 MB    22 hours ago    
llama3.2:latest            a80c4f17acd5    2.0 GB    24 hours ago    


In [None]:
print(results)
print(len(results))

[(Document(metadata={'creationdate': '2009-07-10T14:50:44+04:00', 'moddate': '2009-07-10T14:51:13+04:00', 'title': 'E', 'producer': 'Adobe PDF Library 9.0', 'page_label': '18', 'source': 'data/laws_of_chess.pdf', 'keywords': '', 'subject': '', 'company': '', 'total_pages': 25, 'creator': 'Acrobat PDFMaker 9.1 for Word', 'sourcemodified': 'D:20090710105007', 'author': 'INTERUNFALL Versicherung AG', 'page': 17}, page_content='C.6 The eight ranks (from bottom to top for White and from top to bottom for Black) are \nnumbered 1, 2, 3, 4, 5, 6, 7, 8, respectively. Consequently, in the initial position the white \npieces and pawns are placed on the first and second ranks; the black pieces and pawns on \nthe eighth and seventh ranks. \n \nC.7 As a consequence of the previous rules, each of the sixty -four squares is invariably indicated \nby a unique combination of a letter and a number.'), -226.83512200984381), (Document(metadata={'creator': 'Acrobat PDFMaker 9.1 for Word', 'page': 17, 'total

In [None]:
for i in range(len(results)):
    print(f"""------------------------------------------------------------------------
result id: {results[i][0].metadata['page']} ; similarity score: {results[i][1]}
content: {results[i][0].page_content}""")

------------------------------------------------------------------------
result id: 17 ; similarity score: -226.83512200984381
content: C.6 The eight ranks (from bottom to top for White and from top to bottom for Black) are 
numbered 1, 2, 3, 4, 5, 6, 7, 8, respectively. Consequently, in the initial position the white 
pieces and pawns are placed on the first and second ranks; the black pieces and pawns on 
the eighth and seventh ranks. 
 
C.7 As a consequence of the previous rules, each of the sixty -four squares is invariably indicated 
by a unique combination of a letter and a number.
------------------------------------------------------------------------
result id: 17 ; similarity score: -226.83512200984381
content: C.6 The eight ranks (from bottom to top for White and from top to bottom for Black) are 
numbered 1, 2, 3, 4, 5, 6, 7, 8, respectively. Consequently, in the initial position the white 
pieces and pawns are placed on the first and second ranks; the black pieces and pawn

In [None]:
# insert / update chunks in db
collection.upsert(
    documents=[chunk.page_content for chunk in chunks],
    ids=ids)


In [None]:
# query the db
results = collection.query(
    query_texts= "how many pieces does each player start with?",
    n_results=2
)
# print(results)
# print(len(results))
for i in range(len(results['ids'][0])):
    print(f"""------------------------------------------------------------------------
result id: {results['ids'][0][i]} ; distance: {results['distances'][0][i]}
content: {results['documents'][0][i]}""")


------------------------------------------------------------------------
result id: data/laws_of_chess.pdf:17:2 ; distance: 1.0920754671096802
content: C.6 The eight ranks (from bottom to top for White and from top to bottom for Black) are 
numbered 1, 2, 3, 4, 5, 6, 7, 8, respectively. Consequently, in the initial position the white 
pieces and pawns are placed on the first and second ranks; the black pieces and pawns on 
the eighth and seventh ranks. 
 
C.7 As a consequence of the previous rules, each of the sixty -four squares is invariably indicated 
by a unique combination of a letter and a number.
------------------------------------------------------------------------
result id: data/laws_of_chess.pdf:2:1 ; distance: 1.1138403415679932
content: 3 
Article 2: The initial position of the pieces on the chessboard 
 
2.1 The chessboard is composed of an 8 x 8 grid of 64 equal squares alternately light (the 
‘white’ squares) and dark (the ‘black’ squares). 
The chessboard is placed b

In [None]:
CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """Answer the question based only on the following context:

{context}

-----------

Answer the question based on the above context: {question}
"""

In [None]:
# instantiate in-memory client
chroma_client = chromadb.Client()

In [None]:
docs = [
        "harley davidson",
        "tesla",
        "boeing",
        "suzuki",
        "ford",
        "triumph",
        "indian", 
        "harley",
        "davidson",
        "kawasaki"
    ]

collection.upsert(
    documents= docs,
    ids= [str(x + 1) for x in range(len(docs))]
    # ["id1", "id2", "id3", "id4", "id5", "id6", "id7"]
)

In [None]:
results = collection.query(
    query_texts= ["truck"],
    # n_results=2
)

print(results)

{'ids': [['1', '5', '2', '8', '3', '4', '9', '10', '6', '7']], 'embeddings': None, 'documents': [['harley davidson', 'ford', 'tesla', 'harley', 'boeing', 'suzuki', 'davidson', 'kawasaki', 'triumph', 'indian']], 'uris': None, 'included': ['metadatas', 'documents', 'distances'], 'data': None, 'metadatas': [[None, None, None, None, None, None, None, None, None, None]], 'distances': [[0.8810828328132629, 0.9143844842910767, 1.1023205518722534, 1.2331926822662354, 1.2681713104248047, 1.319493293762207, 1.354628562927246, 1.4447104930877686, 1.492676854133606, 1.5280427932739258]]}
