# Common Information – RAG Pipeline (Haystack)
Notebook ini membangun **pipeline RAG** untuk menjawab pertanyaan umum seputar proses jual–beli menggunakan data di koleksi **`common_information`**.

In [None]:
# --- Setup & Imports
import os
import dotenv
from typing import Dict
from haystack import Pipeline
from haystack.dataclasses import ChatMessage
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import ChatPromptBuilder
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever
from haystack.utils import Secret

dotenv.load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
MONGO_CONNECTION_STRING = os.environ.get("MONGO_CONNECTION_STRING")
assert OPENAI_API_KEY, "Harap set OPENAI_API_KEY di .env"
assert MONGO_CONNECTION_STRING, "Harap set MONGO_CONNECTION_STRING di .env"

DB_NAME = "depato_store"
COLLECTION = "common_information"
VECTOR_INDEX = "vector_index_common_info"
FTS_INDEX = "search_index_common_info"

common_store = MongoDBAtlasDocumentStore(
    database_name=DB_NAME,
    collection_name=COLLECTION,
    vector_search_index=VECTOR_INDEX,
    full_text_search_index=FTS_INDEX,
)

In [None]:
class CommonInformationRAGPipeline:
    def __init__(self, document_store):
        self.pipeline = Pipeline()
        self.pipeline.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"))
        self.pipeline.add_component("retriever", MongoDBAtlasEmbeddingRetriever(document_store=document_store, top_k=6))
        self.pipeline.add_component("prompt_builder", ChatPromptBuilder(variables=["query","documents"], required_variables=["query","documents"]))
        self.pipeline.add_component("generator", OpenAIChatGenerator(model="gpt-4.1-2025-04-14", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])))

        self.pipeline.connect("embedder", "retriever")
        self.pipeline.connect("retriever", "prompt_builder.documents")
        self.pipeline.connect("prompt_builder.prompt", "generator.messages")

    def run(self, query: str) -> str:
        messages = [
            ChatMessage.from_system(
                "Kamu adalah asisten dukungan pelanggan. Jawab singkat, jelas, dan akurat "
                "berdasarkan **dokumen** yang diretrieval. Jika tidak yakin, katakan tidak tahu."
            ),
            ChatMessage.from_user(
                """
                Pertanyaan: {{query}}

                Ringkas jawaban berdasarkan dokumen berikut (gunakan poin-poin bila cocok):
                {% for d in documents %}
                ---
                {{ d.content }}
                {% endfor %}

                Tampilkan bagian "Kebijakan Inti" jika relevan, dan jika ada langkah-langkah, tulis dalam bullet 1., 2., 3.
                """
            )
        ]

        out = self.pipeline.run(
            data={
                "embedder": {"text": query},
                "prompt_builder": {"query": query, "template": messages},
            },
            include_outputs_from=["generator"]
        )
        return out["generator"]["replies"][0].text

rag = CommonInformationRAGPipeline(common_store)

In [None]:
print(rag.run("Bagaimana proses refund di toko ini?"))