Imports

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

Functions

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/gooaq")


def Read_PDF_LandChain(pdf_path="2018_IRC_1stptg.pdf",User_chunk_size=100,User_chunk_overlap=100):
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=User_chunk_size, 
        chunk_overlap=User_chunk_overlap  
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Total Chunks Created: {len(chunks)}")
    return chunks

def Return_retrieved_docs(retrieved_docs):
    if retrieved_docs:
        print(f" Retrieved {len(retrieved_docs)} documents!")
        for i, doc in enumerate(retrieved_docs):
            print(f"\n🔹 Chunk {i+1}:\n{doc.page_content}")
    else:
        print(" No documents retrieved!")

    return [doc.page_content for doc in retrieved_docs]


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [11]:
chunks=Read_PDF_LandChain(pdf_path="CricketLaws.pdf",User_chunk_size=100,User_chunk_overlap=0)

Total Chunks Created: 171


In [23]:
vector_store = Chroma.from_documents(
    chunks, 
    embedding_model, 
    persist_directory="./chroma_db"
)
doc_count = vector_store._collection.count()
print(doc_count)

55044


In [20]:
query="Is the bowler allowed to change ends?"
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
retrieved_docs = retriever.invoke(query)  

In [21]:
retrieved_chunks=Return_retrieved_docs(retrieved_docs)
Returned_retrieved_chunks="\n".join(retrieved_chunks)
print(Returned_retrieved_chunks)

✅ Retrieved 5 documents!

🔹 Chunk 1:
ground before actual delivery, the said bowler may run him out; but if the bowler throw at that

🔹 Chunk 2:
“Play”; from that time no trial ball shall be allowed to any bowler on the ground between the

🔹 Chunk 3:
motion, or if any part of his person be over or before the wicket, the striker shall not be out,

🔹 Chunk 4:
52 No umpire shall be changed during a match, unless with the consent of both sides, except in

🔹 Chunk 5:
35 After the ball shall have been finally settled i n the wicket-keeper’s or bowler’s hand, it
ground before actual delivery, the said bowler may run him out; but if the bowler throw at that
“Play”; from that time no trial ball shall be allowed to any bowler on the ground between the
motion, or if any part of his person be over or before the wicket, the striker shall not be out,
52 No umpire shall be changed during a match, unless with the consent of both sides, except in
35 After the ball shall have been finally settled i n th

In [25]:
import requests
import json

url = 'http://localhost:11434/api/generate'
payload = {
    "model": "llama3",
    "prompt": f"I will provide you text please do you use that text only to give answer for my query. Text:{Returned_retrieved_chunks}, query:{query}, "
}
data = json.dumps(payload)

response = requests.post(url, data=data, headers={'Content-Type': 'application/json'})

if response.status_code == 200:
    list_dict_words = []
    for each_word in response.text.split("\n"):
        try:
            data = json.loads(each_word) 
        except:
            pass
        list_dict_words.append(data)
        


llama_response = " ".join([word['response'] for word in list_dict_words if type(word) == type({})])
print(llama_response)

According  to  the  text ,  there  is  no  mention  of  the  bow ler  being  allowed  to  change  ends .  In  fact ,  it  suggests  that  once  a  bow ler  has  started  bowling  from  a  particular  end  ( after  " that  ' Play ' "),  they  are  not  allowed  to  throw  trial  balls  anymore .  There  is  no  provision  or  indication  in  this  text  that  allows  the  bow ler  to  change  ends  during  the  match .  
