In [2]:
import os
from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
from langchain.prompts import PromptTemplate
from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain.chains import RetrievalQA
from appConfig import *
from pymongo import MongoClient
from langchain.document_loaders.pdf import PyPDFLoader
print(MONGO_DB_URL)

mongodb+srv://dhruv20345:1FRlKSJs6zPNmdq9@cluster0.agirsje.mongodb.net


In [3]:

class EmbeddingGenerator:
    client = None
    def __init__(self, repo_id):
        if EmbeddingGenerator.client is None: EmbeddingGenerator.client = MongoClient(MONGO_DB_URL)
        self.embedding_model = HuggingFaceHubEmbeddings(repo_id=repo_id)
    
    def generate_embeddings(self,file_path,collection_name:str):
        loader = PyPDFLoader(file_path)
        pages = loader.load_and_split()
        if EmbeddingGenerator.client[MONGO_DB_NAME_CACHE][collection_name].find_one({"src_file_name":os.path.basename(file_path)}):
            print("vectors already exist in mongodb")
        else:
            EmbeddingGenerator.client[MONGO_DB_NAME_CACHE][collection_name].insert_one({"src_file_name":os.path.basename(file_path)})
            MongoDBAtlasVectorSearch.from_documents(documents=pages, embedding=self.embedding_model, collection=EmbeddingGenerator.client[MONGO_DB_NAME][collection_name])
            print("vectors stored in mongodb")

In [1]:

class RetrievalQAGenerator:
    def __init__(self, EMBEDDING_MODEL, DB_COLLECTION_NAME="general"):
        load_vectors = MongoDBAtlasVectorSearch.from_connection_string(
            connection_string = MONGO_DB_URL,
            namespace = MONGO_DB_NAME + "." + DB_COLLECTION_NAME,
            embedding = EMBEDDING_MODEL,
        )
        self.qa_retriever = load_vectors.as_retriever(search_type="similarity",search_kwargs={"k": 25})
        
        template = """
        You're helpful AI assistant given the task to help people seeking law advice.
        You have to help a person to use the Indian laws in a legal manner.
        Answer in step by step points by highlighting the sections of Indian laws & constitution.
        Refuse to answer if it is not helping in legal affairs, also do not conceal anything.
        Deny to answer the question if it is not provided in the text.
        {context}
        
        Question: {question} including section number and all related details.
        Answer:"""
        self.prompt = PromptTemplate(template=template, input_variables=["context", "question"])
        self.llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", temperature=0.8, max_new_tokens=4096)

    def generate_retrieval_qa_chain(self):
        return RetrievalQA.from_chain_type(
            llm=self.llm,
            retriever=self.qa_retriever,
            chain_type_kwargs={"prompt": self.prompt},
        )


In [None]:

class Main:
    qa_chain = None
    embedding_generator = None
    def __init__(self) -> None:
        if Main.embedding_generator is None:
            Main.embedding_generator = EmbeddingGenerator(repo_id="sentence-transformers/all-MiniLM-L6-v2")
        if Main.qa_chain is None:
            qa_generator = RetrievalQAGenerator(EMBEDDING_MODEL=Main.embedding_generator.embedding_model)
            Main.qa_chain = qa_generator.generate_retrieval_qa_chain()
    
    def generate_embedding(self,file_path,collection_name="general"):
        Main.embedding_generator.generate_embeddings(file_path,collection_name)

    def ask_question(self, question: str):
        try:
            response = Main.qa_chain({"query":question, "early_stopping":True,"min_length":2000,"max_tokens":5000})
            return response["result"]
        except Exception as e:
            return "Retry to ask question!, An error message: "+ str(e)

In [5]:
model = Main()
model.generate_embedding("D:\Files\LLM\Project\DataSourceFiles\IPC_186045.pdf")

  from .autonotebook import tqdm as notebook_tqdm


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Dhruv\.cache\huggingface\token
Login successful
vectors stored in mongodb


In [6]:
# print(model.ask_question("punishment for robbery?"))

  warn_deprecated(


 

        1. The Indian Penal Code, 1860, defines robbery in Section 390. It states that, "whoever, in order to commit extortion, puts any person in fear of death or of hurt, or in fear of any injury to the person or reputation of such person or any relative or friend of his or anything which is believed by such person to be in the possession of such person, is said to commit "robbery".
        
        2. The punishment for robbery is given in Section 392 of the Indian Penal Code. It states that, "whoever commits robbery shall be punished with rigorous imprisonment for a term which may extend to ten years, and shall also be liable to fine."

        3. If the robbery is committed on the highway between sunset and sunrise, the punishment is more severe. Section 393 of the Indian Penal Code states that, "whoever, in committing or attempting to commit robbery, uses force or violence upon, or threatens to use such force or violence upon, any person, or property in the possession of, or i