In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
from langchain_openai import AzureOpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq

In [2]:
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
groq_api_key=os.getenv('GROQ_API_KEY')

In [3]:
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="lala",
    openai_api_version="2024-03-01-preview",
)

In [6]:
from langchain.retrievers.multi_query import MultiQueryRetriever
llm=ChatGroq(groq_api_key=groq_api_key,
             model_name="Llama3-8b-8192")
database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=database.as_retriever(), llm=llm
)

### Multiquert

In [7]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [8]:
unique_docs = retriever_from_llm.invoke("Tell me about diff IPC Sections")
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['Here are three alternative versions of the user question to retrieve relevant documents from a vector database:', '', 'Tell me about different IPC sections', '--------------------------------------------------', '', 'Alternative 1:', 'What are the main categories of intellectual property law in the Indian Patent Act?', '-------------------------------------------------', '', 'Alternative 2:', 'Provide information on the various sections of the Indian Patents Act, including the scope and application of each.', '-------------------------------------------------', '', 'Alternative 3:', 'Can you give me an overview of the various sections of the Indian Patent Act, such as patentability, patent infringement, and patent licensing?', '-------------------------------------------------', '', 'These alternative questions aim to capture different aspects of the original question, allowing for a more comprehensive search of the vector data

16

In [5]:
def answer(input):
    llm=ChatGroq(groq_api_key=groq_api_key,
             model_name="Llama3-8b-8192")

    prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context. 
    Think step by step before providing a detailed answer. 
    I will tip you $1000 if the user finds the answer helpful. 
    <context>
    {context}
    </context>
    Question: {input}""")

    document_chain=create_stuff_documents_chain(llm,prompt)
    database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)
    retriever=database.as_retriever()
    retrieval_chain=create_retrieval_chain(retriever,document_chain)
    response=retrieval_chain.invoke({"input":input})

    return response['answer']

### Filter

In [9]:
database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)
retriever=database.as_retriever()

In [10]:
llm=ChatGroq(groq_api_key=groq_api_key,
             model_name="gemma2-9b-it")

# gemma-7b-it
# mixtral-8x7b-32768
# gemma2-9b-it
# Llama3-8b-8192

In [11]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [17]:
import pprint
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "Tell me about all IPC Sections"
)
pretty_print_docs(compressed_docs)



Document 1:

>>>
Rupees Twenty five thousand. 379 -B. Snatching with hurt, wrongful restraint or fear of hurt. 
Whoever, in order to commit snatching, or in committing the s natching, causes hurt or wrongful 
restraint or fear of hurt; or after  committing the offence of snatching, causes hurt or wrongful 
restraint or fear of hurt in order to effect his escape, shall be punished with rigorous 
imprisonment which shall not be less than ten years but which may extend to fourteen years, 
and shall also  be liable to fine of Rupees Twenty five thousand.” [Vide G.S.R. 383(E), dated 29 -
5-2019 (w.e.f. 29 -5-2019).]  
 
IPC Section 380. Theft in dwelling house, etc .—Whoever commits theft in any building, tent or 
vessel, which building, tent or vessel is used as a human dwelling, or used for the custody of 
property, shall be punished with imprisonment of either description for a term which may 
extend to seven years, and shall also be liable to fine.
>>>
----------------------------------

In [18]:
# Helper function for printing docs


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )


In [27]:
from langchain.retrievers.document_compressors import LLMChainFilter

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "Tell me about all IPC sections"
)
pretty_print_docs(compressed_docs)

Document 1:

Rupees Twenty five thousand. 379 -B. Snatching with hurt, wrongful restraint or fear of hurt. 
Whoever, in order to commit snatching, or in committing the s natching, causes hurt or wrongful 
restraint or fear of hurt; or after  committing the offence of snatching, causes hurt or wrongful 
restraint or fear of hurt in order to effect his escape, shall be punished with rigorous 
imprisonment which shall not be less than ten years but which may extend to fourteen years, 
and shall also  be liable to fine of Rupees Twenty five thousand.” [Vide G.S.R. 383(E), dated 29 -
5-2019 (w.e.f. 29 -5-2019).]  
 
IPC Section 380. Theft in dwelling house, etc .—Whoever commits theft in any building, tent or 
vessel, which building, tent or vessel is used as a human dwelling, or used for the custody of 
property, shall be punished with imprisonment of either description for a term which may 
extend to seven years, and shall also be liable to fine.
------------------------------------------

In [20]:
database.docstore._dict

{'58626aeb-8f45-4be2-b37b-0d9d16755510': Document(page_content='•President INK-IT Publication Club, GGSIPU EDC Nov 2021 - March 2023\n–Led a team of over 50 members in annual newspaper publication, ensuring adherence to deadlines and high-quality\nstandards.\nCERTIFICATIONS\n•Microsoft Azure issued on June 2024\nAzure AI Fundamentals\n•Deeplearning.AI issued on Feb 2024\nMachine Learning Modeling Pipelines in Production', metadata={'source': 'uploaded_pdfs\\Avinash_Kumar_Srivastava_USAR.pdf', 'page': 0}),
 '504d8bac-f72b-47ec-a486-0eda259a81ab': Document(page_content='Avinash Kumar Srivastava +91-9289690679\nBachelor of Technology avinashsri1605@gmail.com\nIndustrial Internet of Things GitHub\nGuru Gobind Singh Indraprastha University EDC Delhi LeetCode\nEnrollment No: 02719011721 LinkedIn\nEducation\n•Bachelor of Technology in Industrial Internet of Things 2021-25\nUNIVERSITY SCHOOL OF AUTOMATION AND ROBOTICS Surajmal Vihar New Delhi CGPA: 9.14\n•Intermediate Year: 2021\nIndira Ideal 

In [21]:
def delete_vector_database():
    try:
        db = FAISS.load_local('first__vector', embeddings, allow_dangerous_deserialization=True)
        for i in range(db.index.ntotal - 1, -1, -1):
            try:
                doc_id = db.index_to_docstore_id[i]
                db.delete([doc_id])
            except Exception as e:
                print(e)
                # st.error(f"Error deleting index {i}: {e}")
        db.save_local('first__vector')
    except Exception as e:
        print(e)
        # st.error(f"Failed to delete vector database. Reason: {e}")

In [24]:
delete_vector_database()

In [52]:
db = FAISS.load_local('first__vector', embeddings, allow_dangerous_deserialization=True)
db.docstore._dict

{'1a658fdd-a560-4284-b1a6-e220fc227257': Document(page_content='Input-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nInput-Input Layer5\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>\nThe\nLaw\nwill\nnever\nbe\nperfect\n,\nbut\nits\napplication\nshould\nbe\njust\n-\nthis\nis\nwhat\nwe\nare\nmissing\n,\nin\nmy\nopinion\n.\n<EOS>\n<pad>Figure 5: Many of the attention heads exhibit behaviour that seems related to the structure of the\nsentence. We give two such examples above, from two different heads from the encoder self-attention\nat layer 5 of 6. The heads clearly learned to perform different tasks.\n15', meta

In [46]:
from langchain.retrievers.document_compressors import LLMChainFilter
database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)
retriever=database.as_retriever()

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "I live in Gujarat and snatching has been done with me, what punishment does the thief get"
)
pretty_print_docs(compressed_docs)

Document 1:

IPC section 379. Punishment for theft. - Whoever commits theft shall be punished with 
imprisonment of either description for a term which may extend to three years, or with fine, or 
with both  
. ► Bona fide removal of property. —The removal of property in the bona fide exercise of right 
is a good defence, Survari Sanyasi Apparao v. Boddepalli Lakshminarayana, 1961 SCC OnLine 
SC 68.  
STATE AMENDMENTS SECTION 379 -A AND SECTION 379 -B 
States have made amendments  to section 379 -A and 379 -B
----------------------------------------------------------------------------------------------------
Document 2:

IPC section 379. Punishment for theft. - Whoever commits theft shall be punished with 
imprisonment of either description for a term which may extend to three years, or with fine, or 
with both  
. ► Bona fide removal of property. —The removal of property in the bona fide exercise of right 
is a good defence, Survari Sanyasi Apparao v. Boddepalli Lakshminarayana, 1961 

In [45]:
#embedding filter
from langchain.retrievers.document_compressors import EmbeddingsFilter
database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)
retriever=database.as_retriever()
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "I live in Gujarat and snatching has been done with me, what punishment does the thief get"
)
pretty_print_docs(compressed_docs)

Document 1:

IPC section 379. Punishment for theft. - Whoever commits theft shall be punished with 
imprisonment of either description for a term which may extend to three years, or with fine, or 
with both  
. ► Bona fide removal of property. —The removal of property in the bona fide exercise of right 
is a good defence, Survari Sanyasi Apparao v. Boddepalli Lakshminarayana, 1961 SCC OnLine 
SC 68.  
STATE AMENDMENTS SECTION 379 -A AND SECTION 379 -B 
States have made amendments  to section 379 -A and 379 -B
----------------------------------------------------------------------------------------------------
Document 2:

IPC section 379. Punishment for theft. - Whoever commits theft shall be punished with 
imprisonment of either description for a term which may extend to three years, or with fine, or 
with both  
. ► Bona fide removal of property. —The removal of property in the bona fide exercise of right 
is a good defence, Survari Sanyasi Apparao v. Boddepalli Lakshminarayana, 1961 

In [71]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_text_splitters import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, redundant_filter, relevant_filter]
)

In [72]:
llm = ChatGroq(groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768")
# gemma-7b-it
# mixtral-8x7b-32768
# gemma2-9b-it
# Llama3-8b-8192
prompt = ChatPromptTemplate.from_template(
"""
##ROLE##
You are an experienced AI Lawyer, specializing in providing legal guidance on various law-related queries. Your task is to understand the client's current case scenario and offer advice based on the given content.

##INSTRUCTIONS##
1) Answer the following question based only on the provided context.
2) Think step-by-step before providing a detailed answer.
3) Ensure the answer is thorough and helpful.

<context>
{context}
</context>
Question: {input}
"""
)

document_chain = create_stuff_documents_chain(llm, prompt)


In [73]:
database = FAISS.load_local('first__vector',embeddings, allow_dangerous_deserialization= True)
retriever=database.as_retriever()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

retrieval_chain = create_retrieval_chain(compression_retriever, document_chain)
response = retrieval_chain.invoke({"input":  "I live in Gujarat and snatching has been done with me, what punishment does the thief get"})
response['answer']
# compressed_docs = compression_retriever.invoke(
#     "I live in Gujarat and snatching has been done with me, what punishment does the thief get"
# )
# pretty_print_docs(compressed_docs)

"I'm an AI language model and can provide some general information on this topic. However, please note that I can't provide legal advice specific to your situation. I strongly recommend consulting with a local lawyer or the police for advice tailored to your case.\n\nIn Gujarat, as in other parts of India, theft is governed by the Indian Penal Code (IPC). Snatching can be considered a type of theft. The punishment for theft in India depends on the value of the stolen property and other factors.\n\n1. If the value of the stolen property is less than 500 rupees, the thief can be punished with simple imprisonment for up to 3 months, or a fine, or both (Section 379 of IPC).\n2. If the value of the stolen property is 500 rupees or more, but less than 1000 rupees, the thief can be punished with rigorous imprisonment for up to 6 months, or a fine, or both (Section 379 of IPC).\n3. If the value of the stolen property is 1000 rupees or more, the thief can be punished with rigorous imprisonment 

"Based on the provided context, if a theft has occurred in Gujarat, the thief would be punished under IPC (Indian Penal Code) section 379, which states that the punishment for theft can be imprisonment of either description for a term that may extend to three years, or with a fine, or both. However, the punishment can vary if the thief is tried under the state amendments section 379-A or 379-B.\n\nIn your case, since you mentioned that snatching has been done with you, it's important to note that snatching is considered a form of theft. The punishment for theft, including snatching, is outlined in IPC section 379.\n\nTo summarize, if a theft, including snatching, has occurred in Gujarat, the thief can be punished with imprisonment of up to three years, a fine, or both, according to IPC section 379. The punishment can be different if the thief is tried under the state amendments section 379-A or 379-B."

In [70]:
delete_vector_database()

In [82]:
db = FAISS.load_local('first__vector', embeddings, allow_dangerous_deserialization=True)
db.docstore._dict

{'3828639f-ca4c-4939-8a72-44e6ef386c94': Document(page_content='punishable under Section 377 of this Code, may be punished with 399[imprisonment for life].  \nIPC sectio n 389 . Putting person in fear of accusation of offence, in order to commit extortion. —\nWhoever, in order to the committing of extortion, puts or attempts to put any person in fear of \nan accusation, against that person or any other, of having committed, or attemp ted to commit, \nan offence punishable with death or with 400[imprisonment for life], or with imprisonment for a \nterm which may extend to ten years, shall be punished with imprisonment of either description \nfor a term which may extend to ten years, and shall  also be liable to fine; and, if the offence be \npunishable under Section 377 of this Code, may be punished with 401[imprisonment for life].', metadata={'source': 'uploaded_pdfs\\acts_of_theft.pdf', 'page': 5})}