In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

# laod all env variable
os.environ['HUGGINGFACE_TOKEN'] = os.getenv('HUGGINGFACE_TOKEN')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')


from langchain_groq import ChatGroq
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


In [2]:
llm = ChatGroq(model='Llama3-8b-8192')
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001F4C6CFCD10>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001F4C6E31290>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [3]:
# load machine learning text file
loader = TextLoader('../1.3-textsplitter/machine-learning.txt')
docs = loader.load()

In [4]:
# split the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='The Rise of Artificial Intelligence: Transforming the World as We Know It\nArtificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. From personalized recommendations on streaming platforms to sophisticated systems that can detect diseases or drive cars, AI is reshaping how we live, work, and interact with the world. But what exactly is AI, how does it work, and what does its rapid growth mean for our future?'),
 Document(metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content="What is Artificial Intelligence?\nAt its core, Artificial Intelligence refers to machines or software that mimic human intelligence. This includes the ability to learn from data, reason through problems, understand language, perceive their environment, and even exhibit creativity. AI doesn't refer to one single technology but rather a collection of technolo

In [5]:
# embedding all text doc
embedding = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# embed data inside vector database
vector_store = Chroma.from_documents(documents, embedding)

In [8]:
vector_store.similarity_search("what is machine learning?")

[Document(id='121dfd81-f440-4cd3-8207-f7b61e72199a', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='How AI Works\nModern AI largely relies on a subset of computer science called machine learning (ML), where systems learn patterns from data and improve over time without being explicitly programmed. For instance, a machine learning model trained on thousands of cat images can learn to recognize cats in new images with surprising accuracy.'),
 Document(id='fa628a49-3a1d-42d5-b8c8-4cef28373a26', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='1. Healthcare\nAI is revolutionizing healthcare with applications ranging from early diagnosis to personalized treatment plans. Machine learning models can analyze medical images to detect diseases like cancer earlier and with greater accuracy than traditional methods. AI-powered chatbots and virtual assistants are also helping patients manage their health.'),
 Document(id='6aa518c9-1267-4

In [9]:
retriver = vector_store.as_retriever()
retriver.invoke("what is machine learning")

[Document(id='121dfd81-f440-4cd3-8207-f7b61e72199a', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='How AI Works\nModern AI largely relies on a subset of computer science called machine learning (ML), where systems learn patterns from data and improve over time without being explicitly programmed. For instance, a machine learning model trained on thousands of cat images can learn to recognize cats in new images with surprising accuracy.'),
 Document(id='fa628a49-3a1d-42d5-b8c8-4cef28373a26', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='1. Healthcare\nAI is revolutionizing healthcare with applications ranging from early diagnosis to personalized treatment plans. Machine learning models can analyze medical images to detect diseases like cancer earlier and with greater accuracy than traditional methods. AI-powered chatbots and virtual assistants are also helping patients manage their health.'),
 Document(id='620526ba-6777-4

In [10]:
prompt = ChatPromptTemplate.from_template(
    """
    You are an expert AI assistant. Answer the following question based only on the context below.
    
    <context>
    {context}
    </context>

    Question: {input}

    Answer in a clear, concise way.
    """
)

In [11]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriver, question_answer_chain)

In [12]:
response = rag_chain.invoke({"input": "whai is machine learning?"})

In [13]:
response

{'input': 'whai is machine learning?',
 'context': [Document(id='121dfd81-f440-4cd3-8207-f7b61e72199a', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='How AI Works\nModern AI largely relies on a subset of computer science called machine learning (ML), where systems learn patterns from data and improve over time without being explicitly programmed. For instance, a machine learning model trained on thousands of cat images can learn to recognize cats in new images with surprising accuracy.'),
  Document(id='620526ba-6777-4853-b357-56734eeefb4e', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='Another important subset of AI is deep learning, which uses artificial neural networks inspired by the human brain. Deep learning has enabled significant breakthroughs in areas such as speech recognition, computer vision, and natural language processing.\n\nKey components that power AI systems include:\n\nData: AI needs large volumes of d

In [14]:
response['answer']

'Based on the context, machine learning is a subset of computer science that enables systems to learn patterns from data and improve over time without being explicitly programmed.'

In [30]:
response = rag_chain.invoke({"input": "whai is deep learning?"})

In [31]:
response['answer']

'According to the context, deep learning is a subset of AI that uses artificial neural networks inspired by the human brain, enabling significant breakthroughs in areas such as speech recognition, computer vision, and natural language processing.'

#### Adding Chat History

In [15]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

In [16]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [17]:
history_aware_retriver = create_history_aware_retriever(llm, retriver, contextualize_q_prompt)
history_aware_retriver

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001F4E317FE90>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessag

In [18]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

qa_prompt

ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [19]:
chain = create_stuff_documents_chain(llm, qa_prompt)
retriver_rag_chain = create_retrieval_chain(history_aware_retriver, chain)

In [23]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}
def get_session_history(session_id:str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(
        retriver_rag_chain, 
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )
config = {"configurable": {"session_id": "chat1"}}

In [24]:
response = with_message_history.invoke(
    {"input": "What is machine learning"},
    config=config
)
response


{'input': 'What is machine learning',
 'chat_history': [],
 'context': [Document(id='121dfd81-f440-4cd3-8207-f7b61e72199a', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='How AI Works\nModern AI largely relies on a subset of computer science called machine learning (ML), where systems learn patterns from data and improve over time without being explicitly programmed. For instance, a machine learning model trained on thousands of cat images can learn to recognize cats in new images with surprising accuracy.'),
  Document(id='fa628a49-3a1d-42d5-b8c8-4cef28373a26', metadata={'source': '../1.3-textsplitter/machine-learning.txt'}, page_content='1. Healthcare\nAI is revolutionizing healthcare with applications ranging from early diagnosis to personalized treatment plans. Machine learning models can analyze medical images to detect diseases like cancer earlier and with greater accuracy than traditional methods. AI-powered chatbots and virtual assistants are als

In [None]:
# from langchain_core.messages import AIMessage, HumanMessage

# chat_history1 = []

# questions = "what is machine learning"

# response1 = retriver_rag_chain.invoke({"input": questions, "chat_history": chat_history1})

# chat_history1.extend(
#     [
#         HumanMessage(content=questions),
#         AIMessage(content=response1['answer'])
#     ]
# )
# question2 = "Tell me about it"
# response2 = retriver_rag_chain.invoke({"input": question2, "chat_history": chat_history1})
# response2['answer']