In [2]:
from huggingface_hub import login
login(token="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")

from langchain_huggingface import HuggingFaceEndpoint

repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(repo_id = repo_id)

from langchain_community.embeddings import HuggingFaceInstructEmbeddings

embeddings = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: "
)

  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\Naveen\.cache\huggingface\token
Login successful
load INSTRUCTOR_Transformer
max_seq_length  512


In [7]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_text_splitters import RecursiveCharacterTextSplitter


loader = PyPDFDirectoryLoader('./data')
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [29]:
from langchain_core.prompts import  PromptTemplate

from typing import Literal
from langchain.output_parsers import PydanticOutputParser

from langchain_core.pydantic_v1 import BaseModel, Field


# Data model
class RouteQuery(BaseModel):
    """Output if question is standalone or hydechain"""

    datasource: Literal["hydechain", "standalone"] = Field(
        ...,
        description="Determine whether to enter the process for handling declarative and normal sentences or interrogative and question sentences (indirect or direct questions) based on the user's query tone",
    )
    
parser = PydanticOutputParser(pydantic_object=RouteQuery)

prompt = PromptTemplate(
    template=
    """
        As an AI assistant for application route, determine whether to enter the process for handling declarative and normal sentences or interrogative and question sentences (indirect or direct questions) based on the user's query tone inside [Query] marks,
        If it is a declarative and normal sentence process, return the string: "hydechain"; 
        otherwise, return: "standardalonequery".

        Notice: 
        Do not answer the query or make up the query, only return as simple as possible, eithter "standardalonequery" or "hydechain" as string without any instruction text, reasoning text, headlines, leading-text or other additional information.
        
        \n{format_instructions}\n{query}\n
        
        """,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)


route_chain = prompt | llm | parser

In [34]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder


prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessagePromptTemplate.from_template(
                """Generate a different version of the text inside [Origin text] marks with the help of conversation history to retrieve relevant documents from a vector storage.
The version is for better model comprehension while maintaining the original text sentiment and brevity.
Your goal is to help the user overcome some of the limitations of the distance-based similarity search. 
Notice: Only return the reformulated statement without any instruction text, reasoning text, headlines, leading-text or other additional information."""
            ),
            MessagesPlaceholder(variable_name="history"),
            HumanMessagePromptTemplate.from_template(
                "[Origin text]\n{query}\n[Origin text]"
            ),
        ]
    )

from langchain_core.output_parsers import StrOutputParser

hyde_chain = prompt | llm | StrOutputParser()

In [35]:
prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessagePromptTemplate.from_template(
                """Given a conversation history and a follow-up query inside [Query] marks, rephrase the follow-up query to be a standalone query. \
Do NOT answer the query, just reformulate it if needed, otherwise return it as is.
Notice: Only return the final standalone query as simple as possible without any instruction text, headlines, leading-text or other additional information."""
            ),
            MessagesPlaceholder(variable_name="history"),
            HumanMessagePromptTemplate.from_template("[Query]{query}[Query]"),
        ]
    )

standalone_chain =  prompt | llm | StrOutputParser()

In [42]:
def choose_route(info):
    if "standalone" in info["next_step"].datasource:
        return standalone_chain

    if "hydechain" in info["next_step"].datasource:
        return hyde_chain

    raise ValueError("Invalid next step")

In [84]:
prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessagePromptTemplate.from_template(
                "Answer query inside [Query] marks solely based on the following context:\n{context}\n"
            ),
            MessagesPlaceholder(variable_name="history"),
            HumanMessagePromptTemplate.from_template("[Query]{query}[Query]"),
        ]
    )

temp = []
def return_source(x):
    temp.append([x])
    return x


mid_chain = (
        RunnablePassthrough.assign(next_step=route_chain)
        | RunnablePassthrough.assign(context=(RunnableLambda(choose_route) | retriever))
        | RunnableLambda(return_source)
        | prompt
        | llm
        | StrOutputParser()
    )

In [86]:
from operator import itemgetter
from typing import List

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.documents import Document
from langchain_core.messages import BaseMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import (
    ConfigurableFieldSpec
)
from langchain_core.runnables.history import RunnableWithMessageHistory


class InMemoryHistory(BaseChatMessageHistory, BaseModel):
    """In memory implementation of chat message history."""

    messages: List[BaseMessage] = Field(default_factory=list)

    def add_message(self, message: BaseMessage) -> None:
        """Add a self-created message to the store"""
        self.messages.append(message)

    def clear(self) -> None:
        self.messages = []


store = {}


def get_session_history(user_id: str, conversation_id: str) -> BaseChatMessageHistory:
    if (user_id, conversation_id) not in store:
        store[(user_id, conversation_id)] = InMemoryHistory()
    return store[(user_id, conversation_id)]

In [89]:
final_chain = RunnableWithMessageHistory(
    mid_chain,
    get_session_history=get_session_history,
    input_messages_key="query",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="user_id",
            annotation=str,
            name="User ID",
            description="Unique identifier for the user.",
            default="",
            is_shared=True,
        ),
        ConfigurableFieldSpec(
            id="conversation_id",
            annotation=str,
            name="Conversation ID",
            description="Unique identifier for the conversation.",
            default="",
            is_shared=True,
        ),
    ],
)

In [90]:
result = final_chain.invoke(
        {"query": "What is astute ai?"},
        config={
            "configurable": {"user_id": "user_id", "conversation_id": "conversation_id"}
        },
    )

Parent run fc8f6393-27af-4013-9550-ba90ff84a717 not found for run 3ba02814-b75d-4429-b776-ac08f67e1791. Treating as a root run.


{'query': 'What is astute ai?', 'history': [], 'next_step': RouteQuery(datasource='standalone'), 'context': [Document(page_content='employees, send them dynamic messages via Slack, and help them take \naction in real time. “You may fail but you must come back stronger!”  \nResearch indicates that  62 to 73 percent of AI projects fail \nonly when the guidance to integrate Artificial Intelligence \nwith your business is shattered but guess what? we excel \nat it. Astute integrates AI with your business like no one \nelse can level it u p. But you can Smile; Astute is here!!! we \ntransform the pitfalls into blossoms! Collaborating with \nAstuteAi helps you address your problems in handling AI \nand chatbots. Astute enhances the Business process with \ninnovative generative AI Solutions. We trans form business \nworkflows and performances by integrating Artificial \nIntelligence.   \n \n \nAstuteAI is a platform where innovation meets efficiency; \nit’s not just a tool but a real Game -ch

In [107]:
result

' Astute AI is a platform that combines Machine Learning (ML) with a user-friendly, cloud-based SaaS platform to empower businesses to unleash the potential of their data, effortlessly integrate AI, and optimize processes within a single platform. It is accessible and scalable for businesses of all sizes.'

In [104]:
set([x.metadata['source'] for x in temp[0][0]['context']])

{'data\\(1) The ROI of AI - ASTUTE.pdf',
 'data\\Astute Machine Learning.pdf',
 'data\\Astute SaaS and AI.pdf'}

In [114]:
store[('user_id','conversation_id')].messages

[HumanMessage(content='What is astute ai?'),
 AIMessage(content=' Astute AI is a platform that combines Machine Learning (ML) with a user-friendly, cloud-based SaaS platform to empower businesses to unleash the potential of their data, effortlessly integrate AI, and optimize processes within a single platform. It is accessible and scalable for businesses of all sizes.')]