In [4]:
#rag with the context only
from langchain_community.document_loaders import CSVLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

#adding history to rag
from langchain_core.messages import HumanMessage, AIMessage
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever

from dotenv import load_dotenv
import os

# Load Documents

In [5]:
#load each file in dir
list_dir = os.listdir('data')
paths = [os.path.join('data',path) for path in list_dir]
docs_list = []

for url_path in paths :
    if url_path.endswith('csv') : 
        csv_loader = CSVLoader(url_path)
        docs_list += csv_loader.load()
    elif url_path.endswith('pdf') : 
        pdf_loader = PyPDFLoader(url_path)
        pdf_doc = pdf_loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=200)
        pdf_doc_splitted = splitter.split_documents(pdf_doc)
        docs_list += pdf_doc_splitted

In [6]:
len(docs_list)

20887

# Retrieval

In [7]:
embedding_model = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
chroma_db = Chroma.from_documents(docs_list, embedding_model, persist_directory='.chroma_index')

In [10]:
chroma_db = Chroma(persist_directory='.chroma_index/', embedding_function=embedding_model)

  chroma_db = Chroma(persist_directory='.chroma_index/', embedding_function=embedding_model)


# Context Integration

In [None]:
load_dotenv()
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

In [None]:
llm_model = ChatGroq(model='meta-llama/llama-4-scout-17b-16e-instruct', api_key=GROQ_API_KEY)

In [None]:
#get context from retriever 
prompt = ChatPromptTemplate.from_messages([
    ('system', 'You are a financial analyst who helps users understand financial data and answer queries.'),
    ('human', 'Here is the data:\n{context}\n\nQuestion: {input}')
])

In [None]:
document_chain = create_stuff_documents_chain(llm_model, prompt)

In [None]:
retriver = chroma_db.as_retriever()

In [None]:
retriver_chain = create_retrieval_chain(retriver,document_chain)

# LLM Response

In [None]:
result = retriver_chain.invoke({
    'input' : 'Give me insight about loan approvals for low income applicants.'
})  

In [None]:
print(result['answer'])

In [None]:
result = retriver_chain.invoke({
    'input' : 'You just gave me the answer, please make it more simple text'
})

In [None]:
print(result['answer'])

## RAG with History 
create prompt template with history (MessagesPlaceholder('chat_history')) -> create retriever history dengan menggabungkan retriever, llm dan history prompt (create_history_aware_retriever) -> prompt -> chain llm dengan prompt (create_stuff_docuement_chain) -> retrieval chain antara history retriever dan chain llm prompt (rag chain) -> untuk menggunakan chatbot simpan history kedalam list yang berisikan AIMessage dan HumanMessage

In [None]:
contextualized_system_prompt_template = """
You are a helpful assistant specialized in data analysis and answering questions about datasets.

Your job is to rephrase follow-up questions into fully standalone questions by using the previous chat history for context.

Only output the rephrased standalone question. Do not answer the question.
"""

In [None]:
contextualized_system_prompt = ChatPromptTemplate.from_messages([
    ('system', contextualized_system_prompt_template),
    (MessagesPlaceholder('chat_history')),
    ('human', "{input}")
])

In [None]:
retriever_aware_history = create_history_aware_retriever(llm_model, retriver, contextualized_system_prompt)

In [None]:
#create prompt to for the context and history 
qa_prompt = ChatPromptTemplate.from_messages([
    ('system', 'You are a financial analyst who helps users understand financial data and answer queries.'),
    (MessagesPlaceholder('chat_history')),
    ('human', 'Here is the data:\n{context}\n\nQuestion: {input}')
])

In [None]:
#chain llm dan qa_prompt
llm_context_chain = create_stuff_documents_chain(llm_model, qa_prompt)

In [None]:
#chain the retriever aware history and llm with context
rag_chain = create_retrieval_chain(retriever_aware_history, llm_context_chain)

### Chat History in list
- hanya satu pengguna
- belum butuh penyimpanan dalam jangka panjang
- hanya satu arah

In [None]:
chat_history = []

question = 'Give me insight about loan approvals for low income applicants.'
response = rag_chain.invoke({
    'input' : question, 
    'chat_history' : chat_history
})
print(response['answer'])

chat_history.extend([
    (HumanMessage(content=question)),
    (AIMessage(content=response['answer']))
])

In [None]:
print(len(chat_history))

In [None]:
question = 'Make it more simple'
response = rag_chain.invoke({
    'input' : question, 
    'chat_history' : chat_history
})
print(response['answer'])

In [None]:
question = 'What is LLM ?'
response = rag_chain.invoke({
    'input' : question, 
    'chat_history' : chat_history
})
print(response['answer'])

### Chat History with configuration Session id 

- membuat chat dapat menyimpan dan mengingat riwayat obrolan di tiap sesi
- dapat menyimpan dan memanggil riwayat dari tiap sesi tanpa bentrok
- RunnableWithMessageHistory
- cocok digunakan untuk aplikasi multi user (streamlit, fastapi)
- penyimpanan jangka panjang
- history otomatis terpanggil
- bebas berapa banyak history yang diingat

In [None]:
#set up config 
store = {}
def get_config_history(session_id) -> BaseChatMessageHistory : 
    if session_id not in store : 
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [None]:
#adding history feature to rag 
rag_with_config_history = RunnableWithMessageHistory(rag_chain, get_config_history, input_messages_key='input', history_messages_key='chat_history', output_messages_key='answer')
rag_with_config_history

In [None]:
config = {
    'configurable' : {
        'session_id' : 'chat1'
    }
}

In [None]:
rag_with_config_history.invoke(
    {'input': 'What kind person to let them approve their loan'},
    config={
    'configurable' : {
        'session_id' : 'chat1'
    }
}
)['answer']

In [None]:
response = rag_with_config_history.invoke(
    {'input': 'How the person requirements if their loan is approaved'},
    config={
    'configurable' : {
        'session_id' : 'chat1'
    }
}
)['answer']

print(response)