In [1]:
## ChatPromptTemplate

In [7]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

In [None]:
## Inbuilt imports
import os ## use for doc extension, if already using
import pathlib # creates single var


# text and pdf covered, also covers Images(jpg, png), have to see how??
from langchain_community.document_loaders import UnstructuredFileLoader 
# pdf

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from contanst import available_docs
from utils import blog
from time import time

def print_doc_list():
    print("List of available documents:")
    for count in range(len(available_docs)):
        print(f"[{count}] {available_docs[count].file_name}")
   
class DocumentReader:
    
    def __init__(self):
        # Initialising text splitter
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        print_doc_list()
        chosen_doc = int(input("Enter index of chosen document: "))
        self.path = available_docs[chosen_doc].file_path
        blog(f"File chosen -----> {self.path}")
                  
              
    # get the file extension
    def get_file_extension(self):
        return pathlib.Path(self.path).suffix
    
    # returns loaded document
    def get_document(self):
        file_ext = self.get_file_extension()
        match file_ext:
            case '.pdf':
                loader = UnstructuredFileLoader(self.path)               
            case '.txt':
                loader = UnstructuredFileLoader(self.path)
            case _:
                print('Format of the document is not supported')    
        return loader.load()
     
    # Splitting documents
    def split_documents(self):
        docs = self.get_document()
        return self.text_splitter.split_documents(docs)
        
            
    # creates vector embeddings and stores in vector store    
    def load_document(self,embeddings):
        docs = self.split_documents()
        start_time = time()
        vector_store =  Chroma.from_documents(documents=docs, embedding = embeddings)
        blog(f"Vector Store Creation time ----->{time() - start_time}")       
        return vector_store     
        
         

In [10]:
## need an LLM
from langchain_community.llms.ctransformers import CTransformers
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_huggingface import HuggingFaceEmbeddings

dir = r"D:\OneDrive - Adani\Desktop\LEARNING_FOLDER\_Kolkata_2024\1_LLM\local_downloaded_models"
file_name = "llama-2-7b-chat.Q6_K.gguf"
llm =  CTransformers( model= dir, model_file = file_name, callbacks=[StreamingStdOutCallbackHandler()], config = {"context_length": 16000, "max_new_tokens": 3000})
embed_llm = HuggingFaceEmbeddings(
            model_name = r"D:\OneDrive - Adani\Desktop\LEARNING_FOLDER\_Kolkata_2024\1_LLM\local_downloaded_models\embedding_models\gte-base-en-v1.5",
            show_progress = True,
            model_kwargs = {"trust_remote_code": True})



  from .autonotebook import tqdm as notebook_tqdm


In [12]:
## Question Answering Chaing
qa_prompt = ChatPromptTemplate.from_messages(
    messages= [
        ("system", """
         Answer the user's question from the following context: {context}
         Question: {input} 
         """)
    ]
)

## creating stuff doc chain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
from langchain.chains.combine_documents.reduce import ReduceDocumentsChain
from langchain.chains.combine_documents.map_rerank import MapRerankDocumentsChain
from langchain.chains.combine_documents.refine import RefineDocumentsChain
from langchain.chains import create_history_aware_retriever

llm_chain = prompt | llm

chain = StuffDocumentsChain(
    llm_chain= llm_chain,
    document_variable_name= "context"
)

chain.invoke({"input": ""})
create_history_aware_retriever(llm , )



'\nMapReduceDocumentsChain\nMapRerankDocumentsChain\nReduceDocumentsChain\nRefineDocumentsChain\nStuffDocumentsChain\n'

In [9]:
prompt = """
         Given a chat history and the latest user question, do this step by step, first check if
         the latest user question references anything in the chat history context,if so then reformulate the latest 
         user question into a question that could be understood without the chat history. If the latest question
         does not reference anything in the chat history, return that question as it is, without 
         any change.
         """ 
         
contextual_prompt = ChatPromptTemplate.from_messages(
    messages= [
        ("system", prompt ),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")

    ]
    
    ## New query to be generated if it is related to chat history
    
)         

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='\n         Given a chat history and the latest user question, do this step by step, first check if\n         the latest user question references anything in the chat history context,if so then reformulate the latest \n         user question into a question that could be understood without the chat history. If the latest question\n         does not reference anything in the chat history, return that question as it is, without \n         any change.\n         ')), MessagesPlaceholder(variable_name='chat_history'), HumanMessag

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='\n         Given a chat history and the latest user question, do this step by step, first check if\n         the latest user question references anything in the chat history context,if so then reformulate the latest \n         user question into a question that could be understood without the chat history. If the latest question\n         does not reference anything in the chat history, return that question as it is, without \n         any change.\n         ')), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])
