# RAG AI Implementation

Preparation:
1. OpenAI Token & Hugging Face Token
2. Reading PDF File using PyPDFLoader
3. Text Splitter Implementation

In [57]:

# import library
import pandas as pd
import numpy as np
import os
import glob
from dotenv import load_dotenv

# import visualization library
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

# imports for langchain, plotly and Chroma
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate

# Vector Store
from langchain_chroma import Chroma
from langchain_community.vectorstores import FAISS
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr

In [58]:
# price is a factor for our company, so we're going to use a low cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [59]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [60]:
# List folders
folders = glob.glob("knowledge-bases/*")

documents = []

for folder in folders:
    doc_type = os.path.basename(folder)

    # Replace TextLoader with PDF-specific loader
    loader = DirectoryLoader(
        folder,
        glob="**/*.pdf",
        loader_cls=PyPDFLoader  # this uses PyPDF2 internally
    )

    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)


## Text Splitter

In [61]:
# Splitting text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

print(f"Total chunks created: {len(chunks)}")

Total chunks created: 12


In [62]:
# Melihat dokumen yang displit di folder apa saja
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: service, suku_cadang, tips


## Vector Embedding

### FAISS

In [63]:
embeddings = OpenAIEmbeddings()

# Create vectorstore
vectorstore = FAISS.from_documents(chunks, embedding=embeddings)

total_vectors = vectorstore.index.ntotal
dimensions = vectorstore.index.d

print(f"There are {total_vectors} vectors with {dimensions:,} dimensions in the vector store")

There are 12 vectors with 1,536 dimensions in the vector store


In [64]:
# Prework
vectors = []
documents = []
doc_types = []
colors = []
color_map = {'service':'blue', 'suku_cadang':'green', 'tips':'red'}

for i in range(total_vectors):
    vectors.append(vectorstore.index.reconstruct(i))
    doc_id = vectorstore.index_to_docstore_id[i]
    document = vectorstore.docstore.search(doc_id)
    documents.append(document.page_content)
    doc_type = document.metadata['doc_type']
    doc_types.append(doc_type)
    colors.append(color_map[doc_type])
    
vectors = np.array(vectors)
vectors

array([[ 0.00864471,  0.01786574,  0.00725084, ..., -0.01486354,
         0.00435586, -0.01487695],
       [-0.00504461,  0.01027022, -0.00728703, ...,  0.00393513,
         0.00428708, -0.02134489],
       [ 0.01929165,  0.02299543,  0.00043008, ..., -0.0016329 ,
         0.0105318 , -0.00758077],
       ...,
       [ 0.0021859 ,  0.00797249,  0.00551212, ...,  0.02195701,
        -0.0135663 , -0.02053242],
       [-0.00641842, -0.00936196,  0.0061772 , ...,  0.00885996,
        -0.01148731, -0.02395254],
       [ 0.00456326,  0.00156284, -0.00356125, ...,  0.01155174,
         0.00164501, -0.00900525]], shape=(12, 1536), dtype=float32)

In [65]:
# # saving vectorstore
# vectorstore.save_local("faiss_index")

### Visualizing the Vector

In [66]:
# Visualizing the vector in 2D
tsne = TSNE(n_components=2, perplexity=5, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D FAISS Vector Store Visualization',
    scene=dict(xaxis_title='x',yaxis_title='y'),
    width=800,
    height=600,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [67]:
# Let's try 3D!

tsne = TSNE(n_components=3, perplexity=5, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D FAISS Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## RAG Implementation in Code

In [68]:
# # create a new Chat with OpenAI
# llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# # set up the conversation memory for the chat
# # memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# # the retriever is an abstraction over the VectorStore that will be used during RAG
# retriever = vectorstore.as_retriever()

# # putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
# # conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
# qa_chain = RetrievalQA.from_chain_type(
#         llm=llm,
#         chain_type="stuff",
#         retriever=retriever,
#         return_source_documents=True)

In [69]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_openai import ChatOpenAI

# Your vectorstore and retriever setup
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("human", "{chat_history}\n\nContext:\n{context}\n\nUser: {input}")
])


# 2. Set up memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="input",
    return_messages=True
)

# 3. Set up document chain
llm = ChatOpenAI(model=MODEL, temperature=0)
document_chain = create_stuff_documents_chain(llm, prompt)

# 4. Retrieval chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# 5. Chat loop example (or single turn)
query = "Merawat motor saat banjir"
history_str = memory.buffer_as_str  # Get formatted chat history

result = retrieval_chain.invoke({
    "input": query,
    "chat_history": history_str,
})

# Update memory with the new conversation
memory.save_context({"input": query}, {"output": result["answer"]})

print(result["answer"])


Merawat motor setelah terendam banjir sangat penting untuk menjaga performa dan keamanannya. Berikut adalah langkah-langkah yang dapat Anda lakukan untuk merawat motor Anda setelah terendam banjir:

1. **Cek dan Keringkan Filter Udara**: 
   - Lepaskan filter udara dan periksa kondisinya. Jika filter basah, keringkan dengan baik. Jika perlu, ganti dengan filter udara baru yang sesuai.

2. **Bersihkan dan Lumasi Rantai**: 
   - Jika motor Anda jenis sport atau cub, bersihkan rantai dan lumasi kembali setelah terpapar air banjir. Ini penting untuk mencegah karat dan kerusakan pada rantai.

3. **Periksa Rem**: 
   - Cek kondisi kampas rem dan cakram. Pastikan tidak ada lumpur atau kotoran yang mengganggu sistem pengereman. Kotoran yang menempel dapat merusak komponen rem dan mengurangi efektivitas pengereman.

4. **Pastikan Knalpot Tidak Terisi Air**: 
   - Nyalakan mesin dan gas sedikit untuk memastikan tidak ada air yang tersisa di dalam knalpot. Miringkan motor agar air bisa keluar, at

## RAG Implementation using Gradio Interface

In [70]:
import gradio as gr

def gradio_history_to_str(history):
    lines = []
    for user, bot in history:
        lines.append(f"User: {user}\nAssistant: {bot}")
    return "\n".join(lines)

def chat(message, history):
    chat_history_str = gradio_history_to_str(history)
    result = retrieval_chain.invoke({
        "input": message,
        "chat_history": chat_history_str,
    })
    memory.save_context({"input": message}, {"output": result["answer"]})
    return result["answer"]

# Just use the default interface (remove type="messages")
view = gr.ChatInterface(
    chat,
    title="RAG Chatbot (with Memory, Modern LangChain)",
)
view.launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




In [71]:
# # Wrapping that in a function

# def chat(message, history):
#     result = qa_chain.invoke({"query": message})
#     return result["result"]

In [72]:
# # And in Gradio:

# view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)