In [11]:
import os
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEndpoint
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
from dotenv import load_dotenv

In [2]:
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
PC_KEY = os.getenv("PINECONE_API_KEY")

In [3]:
# Load pdf and split them into chunks:

def load_pdfs(path='Files/'):
    loader = PyPDFDirectoryLoader(path)
    documents = loader.load()
    pages = len(documents)
    print("Number of pages loaded: ", pages)

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=50
    )
    texts = text_splitter.split_documents(documents)
    return texts

In [4]:
doc = load_pdfs()

Number of pages loaded:  112


In [5]:
model_name = "intfloat/e5-small-v2"
hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={
    "token":HF_TOKEN
    }
)



In [6]:
tests = ["This is a test document.", "This is another test document."]
print(hf_embeddings.embed_documents(tests)[0][:3], '...')

[-0.10857832431793213, 0.08348138630390167, 0.009349589236080647] ...


In [7]:
dims = len(hf_embeddings.embed_query("Test query."))
dims

384

In [8]:
repo_id="meta-llama/Meta-Llama-3-8B-Instruct"

llm=HuggingFaceEndpoint(repo_id=repo_id,
                        huggingfacehub_api_token=HF_TOKEN
                        )

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\jaksh\.cache\huggingface\token
Login successful


In [9]:
# VectorDB Initialization:

pc = pinecone.Pinecone(api_key=PC_KEY)

index_name = "project1"
index = pc.Index(index_name)

index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [10]:
index = PineconeVectorStore.from_documents(documents=doc, embedding=hf_embeddings, index_name=index_name)

In [12]:
chain = load_qa_chain(llm=llm, chain_type="stuff")

In [13]:
def retrieve_answers(query):
    doc_search=index.similarity_search(query, k=2)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [14]:
query = "What offence does a person commit if he/she causes death by doing an act with the intention of causing death."
output = retrieve_answers(query)
print(output)

[Document(page_content='4thly .—If the person committing the act knows that it is so imminently  dangerous that it must, in all \nprobability, cause death, or such bodily injury as is likely to cause death, and commits such act without \nany excuse for incurring the risk of causing dea th or such injury as aforesaid.  \nIllustrations  \n(a) A shoots Z with the intention of killing him. Z dies in  consequence. A commits murder.  \n(b) A, knowing that Z is labouring under such a disease that a blow is likely to cause his death, strikes him with the intentio n \nof causing bodily injury. Z dies in consequence of the blow.  A is guilty of murder, although the blow might not have been \nsufficient in the ordinary course of nature to cause the death of a person in a sound state of health. But if A, not knowing that Z is', metadata={'page': 69.0, 'source': 'Files\\IPC_186045.pdf'}), Document(page_content="70 \n Illustrations  \n(a) A lays sticks and turf over a pit, with the intention of ther

  warn_deprecated(


 Murder. According to the passage, if the person committing the act knows that it is so imminently dangerous that it must, in all probability, cause death, and commits such act without any excuse for incurring the risk of causing death, then they have committed the offence of murder. (See illustration (a)). This is in contrast to culpable homicide, which is the offence of causing death with the intention of causing death, but not knowing that death was likely to result. (See illustration (b) and (c)). Therefore, the correct answer is Murder. Note that the passage does not mention the possibility of the person being unaware of the likely outcome, which is a key distinction between murder and culpable homicide.


In [17]:
query = "Page number of cupable homicide mentioned?"
output = retrieve_answers(query)
print(output)

[Document(page_content='297. Trespassing on burial places, etc.  \n298. Uttering words, etc., with deliberate intent to wound the religious feelings.  \n \nCHAPTER XVI  \nOF OFFENCESAFFECTINGTHE  HUMAN  BODY  \n \nOf offences affecting life  \n299. Culpable homicide.  \n300. Murder.  \nWhen culpable homicide is not murder.  \n301. Culpable homicide by causing death of person other than person whose death was intended.  \n302. Punishment for murder.  \n303. Punishment for murder by life -convict.  \n304. Punishment for culpable homicide not amounting to murder.  \n304A. Causing death by negligence.  \n304B. Dowry death.  \n305. Abetment of suicide of child or insane person.  \n306. Abetment of suicide.  \n307. Attempt to murder.  \nAttempts by life -convicts.  \n308. Attempt to commit culpable hom icide.', metadata={'page': 7.0, 'source': 'Files\\IPC_186045.pdf'}), Document(page_content="308. Attempt to commit culpable hom icide.  \n309. Attempt to commit suicide.  \n310. Thug.  \n311. 