# RAG AI Agent

In [1]:
import numpy as np
import requests
import uuid
import re
import redis
import json
import hashlib
import torch
from transformers import BertTokenizer, BertModel
from pprint import pprint
from typing import List, Any
from langchain.schema import Document
from langchain_community.document_loaders import DirectoryLoader
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.vectorstores.base import VectorStoreRetriever
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.messages import BaseMessage
from langchain.agents import AgentExecutor, create_tool_calling_agent
import plotly.graph_objects as go
from pydantic import BaseModel, Field  
from langchain.tools import tool  
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from scipy.spatial.distance import cosine

import logging
logging.basicConfig(level=logging.DEBUG)


  from .autonotebook import tqdm as notebook_tqdm


## Configuration

In [2]:
DATA_PATH = "../data/"
OPENAI_MODEL = "gpt-4o-mini"
OPENAI_EMBEDDINGS_MODEL = "text-embedding-3-large"
OLLAMA_MODEL = "adijayainc/bhsa-llama3.2"
OLLAMA_EMBEDDINGS_MODEL = "snowflake-arctic-embed2"
CHROMA_PATH = "./chroma_langchain_db"
CHROMA_COLLECTION = "juragan_klod_collection"
API_URL = "http://localhost:5050"
QUESTION_THRESHOLD = 0.90
WITH_OLLAMA = True

## Describe LLM Model & Embeddings

In [3]:
class LLMModel:
    @staticmethod
    def llm():
        if not WITH_OLLAMA:
            return ChatOpenAI(model=OPENAI_MODEL, temperature=0.5, max_tokens=500)
        
        return ChatOllama(model=OLLAMA_MODEL, temperature=0.5, max_tokens=500)

    @staticmethod
    def embeddings():
        if not WITH_OLLAMA:
            return OpenAIEmbeddings(model=OPENAI_EMBEDDINGS_MODEL)

        return OllamaEmbeddings(model=OLLAMA_EMBEDDINGS_MODEL)
    
    @staticmethod
    def bind_tools(llm, tools):
        setattr(llm, 'tools', tools)
        return llm

## Chunk RAG Document

In [4]:
class VectorDocument:
    def __init__(self):
        self.new_documents = []

    def load_documents(self) -> List[Document]:
        loader = DirectoryLoader(DATA_PATH, glob="*.txt")
        documents = loader.load()
        return documents

    def split_by_subtopics(self, text: str) -> List[str]:
        subtopic_pattern = r"\[T\](.*?)\[/T\]"
        subtopics = re.findall(subtopic_pattern, text, re.DOTALL)
        return subtopics

    def split_by_content(self, text: str) -> List[str]:
        content_pattern = r"\[PC\](.*?)\[/PC\]"
        content = re.findall(content_pattern, text, re.DOTALL)
        return content

    def format_list_items(self, content: str) -> str:
        list_item_pattern = r"(^|\n)- (.*?)(?=\n|$)"
        formatted_content = re.sub(r"(-\s.*?)(?=\s*-|\n|$)", r"\n\1", content).strip()
        return formatted_content

    def chunk_documents_by_subtopic(self, documents: List[Document]) -> List[Document]:
        self.new_documents = []

        for doc in documents:
            metadata = doc.metadata
            page_content = doc.page_content

            subtopics = self.split_by_subtopics(page_content)
            content_blocks = self.split_by_content(page_content)
            formatted_content = [self.format_list_items(content) for content in content_blocks]

            for subtopic, content in zip(subtopics, formatted_content):
                parts = subtopic.split("\n", 1)
                topic = parts[0].strip()

                new_metadata = metadata.copy()
                new_metadata["topic"] = topic

                self.new_documents.append(
                    Document(page_content=f"{topic}\n{content}", metadata=new_metadata)
                )

        return self.new_documents

## Test Document Chunk

In [5]:
vector_document = VectorDocument()
new_doc = vector_document.chunk_documents_by_subtopic(vector_document.load_documents())

pprint(new_doc)

DEBUG:langchain_community.document_loaders.directory:Processing file: ../data/kubernetes.txt
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): packages.unstructured.io:443
DEBUG:urllib3.connectionpool:https://packages.unstructured.io:443 "GET /python-telemetry?version=0.16.17&platform=Darwin&python3.11&arch=arm64&gpu=False&dev=false HTTP/1.1" 302 None
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): unstructured.io:443
DEBUG:urllib3.connectionpool:https://unstructured.io:443 "GET /?version=0.16.17&platform=Darwin&python3.11&arch=arm64&gpu=False&dev=false HTTP/1.1" 200 232869
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): packages2.unstructured.io:443
DEBUG:urllib3.connectionpool:https://packages2.unstructured.io:443 "GET /python-telemetry?version=0.16.17&platform=Darwin&python3.11&arch=arm64&gpu=False&dev=false HTTP/1.1" 200 599
DETAIL:unstructured.trace:Not narrative. Text exceeds cap ratio 0.5:

[T]Layanan Cloud Kubernetes[/T]
DETAIL:unst

[Document(metadata={'source': '../data/kubernetes.txt', 'topic': 'Layanan Cloud Kubernetes'}, page_content='Layanan Cloud Kubernetes\nJuraganKlod Kubernetes adalah platform manajemen kontainer yang memungkinkan pengelolaan aplikasi berbasis kontainer dengan efisien. Dengan infrastruktur yang kuat, termasuk node worker dengan prosesor Intel Xeon Silver 4210 dan RAM hingga 256 GB, JuraganKlod Kubernetes mendukung orkestrasi kontainer yang otomatis dan skalabilitas yang mudah. Fitur integrasi CI/CD memungkinkan pengembangan yang lebih cepat, sementara dukungan multi-cloud memberikan fleksibilitas dalam pengelolaan infrastruktur.\n\nPlatform ini juga dilengkapi dengan jaringan internal 10 Gbps untuk komunikasi antar node yang cepat, serta penyimpanan terdistribusi menggunakan Ceph atau GlusterFS. Dengan kemampuan auto-scaling, JuraganKlod Kubernetes secara otomatis menyesuaikan jumlah kontainer berdasarkan beban kerja, memastikan aplikasi Anda selalu tersedia dan responsif terhadap permint

## Vector Store

In [6]:
class VectorStoreDocuments:
    def __init__(self):
        self.vectorstore = None

    def vector_store(self) -> 'VectorStoreDocuments':
        self.vectorstore = Chroma(
            collection_name=CHROMA_COLLECTION,
            embedding_function=LLMModel.embeddings(),
            persist_directory=CHROMA_PATH,
        )
        return self

    def store_documents(self, documents: List[Document]) -> None:
        self.vectorstore.add_documents(documents=documents, overwrite=True)

    def retriever(self) -> VectorStoreRetriever:
        return self.vectorstore.as_retriever()
    
    def remove_collection(self):
        self.vectorstore.reset_collection()

    def search(self, query: str, k: int = 5) -> List[Document]:
        return self.vectorstore.similarity_search(query=query, k=k)
    

## Save Document Chunk to VectorDB

In [7]:
store = VectorStoreDocuments()
vs = store.vector_store()
vs.remove_collection()
vs.store_documents(new_doc)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
DEBUG:chromadb.config:Starting component System
DEBUG:chromadb.config:Starting component Posthog
DEBUG:chromadb.config:Starting component OpenTelemetryClient
DEBUG:chromadb.config:Starting component SqliteDB
DEBUG:chromadb.config:Starting component SimpleQuotaEnforcer
DEBUG:chromadb.config:Starting component SimpleRateLimitEnforcer
DEBUG:chromadb.config:Starting component LocalSegmentManager
DEBUG:chromadb.config:Starting component LocalExecutor
DEBUG:chromadb.config:Starting component SegmentAPI
DEBUG:chromadb.api.segment:Collection juragan_klod_collection already exists, returning existing collection.
DEBUG:httpcore.connection:connect_tcp.started host='127.0.0.1' port=11434 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x31

## Test Retriever

In [8]:
dd = vs.search("Saya pengen tau produk kubernetes?", k=5)
pprint(dd)

DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Type', b'application/json; charset=utf-8'), (b'Date', b'Wed, 29 Jan 2025 20:35:01 GMT'), (b'Transfer-Encoding', b'chunked')])
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
DEBUG:httpcore.http11:receive_response_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_body.complete
DEBUG:httpcore.http11:response_closed.started
DEBUG:httpcore.http11:response_closed.complete


[Document(id='ae1640e7-5f17-4fe8-bfb3-c727ed5793ce', metadata={'source': '../data/kubernetes.txt', 'topic': 'Layanan Cloud Kubernetes'}, page_content='Layanan Cloud Kubernetes\nJuraganKlod Kubernetes adalah platform manajemen kontainer yang memungkinkan pengelolaan aplikasi berbasis kontainer dengan efisien. Dengan infrastruktur yang kuat, termasuk node worker dengan prosesor Intel Xeon Silver 4210 dan RAM hingga 256 GB, JuraganKlod Kubernetes mendukung orkestrasi kontainer yang otomatis dan skalabilitas yang mudah. Fitur integrasi CI/CD memungkinkan pengembangan yang lebih cepat, sementara dukungan multi-cloud memberikan fleksibilitas dalam pengelolaan infrastruktur.\n\nPlatform ini juga dilengkapi dengan jaringan internal 10 Gbps untuk komunikasi antar node yang cepat, serta penyimpanan terdistribusi menggunakan Ceph atau GlusterFS. Dengan kemampuan auto-scaling, JuraganKlod Kubernetes secara otomatis menyesuaikan jumlah kontainer berdasarkan beban kerja, memastikan aplikasi Anda sel

## Describe API Client

In [9]:
def api_create_order(order_data: Any) -> (Any | dict[str, Any]):
    response = requests.post(f"{API_URL}/orders", json=order_data)
    return response.json() if response.status_code < 400 else {"error": response.json()}

def api_get_list_product() -> (Any | dict[str, Any]):
    response = requests.get(f"{API_URL}/products")
    return response.json()

def api_search_product(query: str) -> (Any | dict[str, Any]):
    response = requests.get(f"{API_URL}/products/?query={query}")
    return response.json()

def api_check_order(order_number: str) -> (Any | dict[str, Any]):
    response = requests.get(f"{API_URL}/orders/{order_number}")
    return response.json() if response.status_code < 400 else {"error": response.json()}

def api_report_order(year: str) -> (Any | dict[str, Any]):
    response = requests.get(f"{API_URL}/order-report?year={year}")
    return response.json() if response.status_code < 400 else {"error": response.json()}

## Describe LLM Tools

In [10]:
@tool("get-list-product-tool")
def get_list_products():
    """ Daftar produk dan harga yang tersedia, jelaskan jika ada yang bertanya spesifik tentang harga produk atau layanan yang tersedia. """
    response = api_get_list_product()
    return response

class GetProductRequest(BaseModel):
    search: str = Field(description="Kata kunci untuk mencari produk atau layanan, bisa berupa nama produk atau SKU, jika pengguna belum mengetahui SKU, maka tanyakan terlebih dahulu dan berikan daftar produk")

@tool("get-product-tool", args_schema=GetProductRequest)
def get_product(search):
    """ Mendapatkan informasi daftar harga dari layanan atau produk, hasil harus menampilkan SKU, Nama dan Harga, dalam hal ini produk bisa di sebut juga sebagai layanan"""
    response = api_search_product(search)
    product_text = "\n".join(f"SKU: {prod['sku']}, Nama: {prod['name']}, Harga: {prod['price']}" for prod in response)
    return product_text

class GetOrderRequest(BaseModel):
    sku: str = Field(description="SKU harus diisi, SKU adalah kode unik untuk produk, SKU bisa dilihat di daftar produk, jika pengguna belum mengetahui SKU, maka tanyakan terlebih dahulu dan berikan daftar produk, jika customer memberikan nama produk, maka lihat nama produk tersebut di daftar produk untuk mendapatkan SKU")
    email: str = Field(description="Email customer harus diisi, jika customer belum menyebutkan email, maka tanyakan terlebih dahulu")
    full_name: str = Field(description="Nama lengkap customer, jika customer belum menyebutkan nama, maka tanyakan terlebih dahulu")

@tool("create-order-tool", args_schema=GetOrderRequest)
def create_order(sku, email, full_name):
    """ Membuat order baru, hasil harus menampilkan nomor order dan link pembayaran, jika ada error maka tampilkan pesan terkait error tersebut, jika customer telah menyebutkan nama, maka gunakan nama tersebut untuk mengisi data order, jika pengguna belum menyebutkan email atau nama, maka tanya terlebih dahulu emailnya"""
    response = api_create_order({"sku": sku, "email": email, "full_name": full_name})
    if "error" in response:
        return f"Error: {response['error']}, try again"
    return f"Order Number: {response['order_number']} created. Payment link: {response['invoice_url']}"

class GetCheckOrderRequest(BaseModel):
    order_number: str = Field(description="Nomor order harus diisi, nomor order bisa dilihat di invoice yang diberikan saat pembuatan order")

@tool("check-order-tool", args_schema=GetCheckOrderRequest)
def check_order(order_number):
    """ Memeriksa order berdasarkan nomor order, cek berdasarkan payment_status, jika order telah selesai maka tampilkan status selesai, jika order belum selesai maka tampilkan status pending, jika order tidak ditemukan maka tampilkan pesan order tidak ditemukan, Jika customer sudah merasa membayar namun status masih pending, maka berikan nomor telepon JuraganKlod untuk konfirmasi pembayaran"""
    response = api_check_order(order_number)
    if "error" in response:
        return f"Error: {response['error']}, try again"

    status = response['payment_status']
    if status == "pending":
        return f"Status: {status}. Please complete payment soon."
    return f"Status: {status}. Thank you for your payment."

class OrderReportRequest(BaseModel):
    year: int = Field(description="Tahun laporan order harus diisi, laporan order berdasarkan tahun")

@tool("order-report-tool", args_schema=OrderReportRequest)
def order_report(year: int):
    """Membuat grafik laporan order per tahun, hasil berisi grafik bar chart yang menampilkan jumlah order per bulan, hasilnya adalah markdown image"""
    response = api_report_order(year)
    if "error" in response:
        return f"Error: {response['error']}, try again"

    months = response['months']
    order_counts = response['order_counts']

    fig = go.Figure(data=[
        go.Bar(x=months, y=order_counts, marker_color='blue')
    ])
    fig.update_layout(
        title=f'Order Report for {year}',
        xaxis_title='Month',
        yaxis_title='Number of Orders',
        xaxis=dict(tickmode='linear'),
        template='plotly_white'
    )

    report_filename = f'order_report_{year}.png'
    fig.write_image(f"storages/reports/{report_filename}")

    return f"{API_URL}/storages/reports/{report_filename}"

class GetContentInformation(BaseModel):
    query: str = Field(description="Query yang ingin dicari, bisa berupa nama layanan, produk, tentang perusahaan atau informasi lainnya")

@tool("get-content-tool", args_schema=GetContentInformation)
def get_content(query: str):
    """Mendapatkan informasi dari konten yang tersedia, berupa layanan, produk, atau informasi lainnya, gunakan tool ini jika jawaban tidak ditemukan dari tool lainnya"""
    retrieved_docs = vs.search(query, k=2)
    
    if not retrieved_docs:
        return "Maaf, tidak ada informasi yang ditemukan terkait."
    
    processed_docs = []
    for doc in retrieved_docs:
        content = doc.page_content.strip()
        if content: 
            processed_docs.append(content)
    
    result = "\n\n".join(processed_docs)
    
    return result

def run_tool():
    tools = [get_content, get_list_products, get_product, create_order, check_order, order_report]
    return tools

In [11]:
redis_client = redis.StrictRedis(
    host='redis-12033.c334.asia-southeast2-1.gce.redns.redis-cloud.com', 
    port=12033,
    username='default',
    password='Xf4XYHdg3R2VcUAu22cztPGKkFQ9B1hu',
)

tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
model = BertModel.from_pretrained("indobenchmark/indobert-base-p2")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://us.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/model.safetensors HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443


## Describe LLM Invocation

In [24]:
class LLMInvocation:
    redis_client = redis.StrictRedis(
        host='redis-12033.c334.asia-southeast2-1.gce.redns.redis-cloud.com',
        port=12033,
        username='default',
        password='Xf4XYHdg3R2VcUAu22cztPGKkFQ9B1hu',
    )
    tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
    model = BertModel.from_pretrained("indobenchmark/indobert-base-p2")

    store = {}
    config = {}

    @staticmethod
    def generate_session_id() -> str:
        return str(uuid.uuid4())

    @staticmethod
    def get_session_history(session_id: str) -> ChatMessageHistory:
        if session_id not in LLMInvocation.store:
            LLMInvocation.store[session_id] = ChatMessageHistory()
        return LLMInvocation.store[session_id]

    @staticmethod
    def add_message_to_session(session_id: str, message) -> None:
        if session_id not in LLMInvocation.store:
            LLMInvocation.store[session_id] = ChatMessageHistory()
        LLMInvocation.store[session_id].add_message(message)

    @staticmethod
    def clear_session_history(session_id: str) -> None:
        if session_id in LLMInvocation.store:
            del LLMInvocation.store[session_id]

    @staticmethod
    def get_current_session_id(agent_scratchpad: dict) -> str:
        return agent_scratchpad.get("session_id", None)

    @staticmethod
    def normalize_question_indoBERT(question: str) -> np.ndarray:
        inputs = LLMInvocation.tokenizer(question, return_tensors="pt", truncation=True, padding=True, max_length=128)
        with torch.no_grad():
            outputs = LLMInvocation.model(**inputs)
        
        sentence_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
        sentence_embedding_rounded = np.round(sentence_embedding, 4)
        
        return sentence_embedding_rounded

    @staticmethod
    def generate_cache_key(session_id: str, question: str) -> str:
        normalized_question = LLMInvocation.normalize_question_indoBERT(question)
        normalized_question_str = str(normalized_question.tolist())
    
        key = hashlib.md5(f"{session_id}-{normalized_question_str}".encode()).hexdigest()
        return f"cache:{key}"

    @staticmethod
    def clear_all_cache():
        LLMInvocation.redis_client.flushall()

    @staticmethod
    def compare_similarity(question1: str, question2: str) -> bool:
        embedding1 = LLMInvocation.normalize_question_indoBERT(question1)
        embedding2 = LLMInvocation.normalize_question_indoBERT(question2)
        
        similarity = 1 - cosine(embedding1, embedding2)
        print(f"Similarity: {similarity}")
        
        return similarity > QUESTION_THRESHOLD

    @staticmethod
    def create_agent(tools):
        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", (
                    "Kamu adalah asisten layanan server, nama kamu adalah BotJuraganKlod. "
                    "Gunakan konteks yang diberikan untuk menjawab pertanyaan. "
                    "Jangan memberikan jawaban yang tidak berkaitan dengan konteks. "
                    "Hasil tidak boleh ambigu, dan jawaban harus singkat. "
                    "Jika terdapat kata kunci layanan itu maksudnya adalah produk. "
                    "Jika ada pertanyaan mengenai ini apa, atau seperti kebingungan tentang apa, berikan konteks tentang JuraganKlod. "
                    "Jika ada pertanyaan mengenai cara order, berikan jawaban dengan cara mengirimkan nama lengkap, email dan SKU produk atau Nama Produk. "
                    "Jangan menjawab tidak tahu, atau tidak mendapatkan referensi, coba proses jawaban dengan tool lainnya yang relevan. "
                    "Gunakan tool 'get-content-tool' jika kamu membutuhkan informasi lebih lanjut dan tanpa merubah query input. "
                )),
                ("placeholder", "{chat_history}"),
                ("human", "{input}"),
                ("placeholder", "{agent_scratchpad}"),
            ]
        )

        agent = create_tool_calling_agent(llm=LLMModel.llm(), tools=tools, prompt=prompt)
        agent_executor = AgentExecutor(
            name="BotJuraganKlod",
            agent=agent,
            tools=tools,
            return_intermediate_steps=True,
        )

        agent_with_history = RunnableWithMessageHistory(
            agent_executor,
            LLMInvocation.get_session_history,
            input_messages_key="input",
            history_messages_key="chat_history",
            output_messages_key="output",
            stream_runnable=False,
        )

        return agent_with_history

    @staticmethod
    def invoke(question: str, session_id: str) -> str:
        tools = run_tool()
        agent_with_history = LLMInvocation.create_agent(tools)

        LLMInvocation.config = {
            "configurable": {
                "session_id": session_id
            }
        }

        output = agent_with_history.invoke(
            {
                "input": question,
                "chat_history": LLMInvocation.get_session_history(session_id).messages
            },
            LLMInvocation.config
        )

        output_data = output.get("output", "")

        return output_data

    @staticmethod
    def invoke_with_cache(question: str, session_id: str) -> str:
        tools = run_tool()
        agent_with_history = LLMInvocation.create_agent(tools)
        cache_key = LLMInvocation.generate_cache_key(session_id, question)

        cached_response = LLMInvocation.redis_client.get(cache_key)
        if cached_response:
            cached_data = json.loads(cached_response)

            if cached_data.get("must_cache"):
                print("🔄 Cache ditemukan tetapi diabaikan karena menggunakan tools")
            else:
                print("🔥 Menggunakan cache")
                memory = LLMInvocation.get_session_history(session_id)

                memory.add_user_message(question)
                memory.add_ai_message(cached_data["output"])
                
                return cached_data["output"]

        for stored_key in LLMInvocation.redis_client.keys("cache:*"):
            cached_question_data = LLMInvocation.redis_client.get(stored_key)
            if not cached_question_data:
                continue

            cached_json = json.loads(cached_question_data)
            before_question = cached_json.get("input")

            if before_question and LLMInvocation.compare_similarity(question, before_question):
                if cached_json.get("must_cache"): 
                    print("🔄 Cache pertanyaan mirip ditemukan tetapi diabaikan karena menggunakan tools")
                else:
                    print("🔥 Pertanyaan mirip, menggunakan cache")
                    print("🔥 Pertanyaan sebelumnya:", before_question)
                    print("🔥 Pertanyaan saat ini:", question)

                    memory = LLMInvocation.get_session_history(session_id)
                    
                    memory.add_user_message(question)
                    memory.add_ai_message(cached_json["output"])

                    return cached_json["output"]

        LLMInvocation.config = {"configurable": {"session_id": session_id}}
        output = agent_with_history.invoke({"input": question}, LLMInvocation.config)

        if isinstance(output, dict):
            output_data = output.get("output", "")

            must_cache = False

            for step in output.get("intermediate_steps", []):
                tool_action = step[0]
                if hasattr(tool_action, "tool") and "get-content-tool" not in tool_action.tool:
                    must_cache = True
                    break
                    
            LLMInvocation.redis_client.set(cache_key, json.dumps({
                "input": question,
                "output": output_data,
                "must_cache": must_cache 
            }), ex=3600)

            return output_data
        else:
            print("Output tidak dapat diserialisasi:", output)
            return "Terjadi kesalahan dalam proses."

DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/main/model.safetensors HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/indobenchmark/indobert-base-p2 HTTP/1.1" 200 1631


DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/indobenchmark/indobert-base-p2/commits/main HTTP/1.1" 200 3011
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/indobenchmark/indobert-base-p2/discussions?p=0 HTTP/1.1" 200 1262
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/indobenchmark/indobert-base-p2/commits/refs%2Fpr%2F2 HTTP/1.1" 200 3976
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/refs%2Fpr%2F2/model.safetensors.index.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /indobenchmark/indobert-base-p2/resolve/refs%2Fpr%2F2/model.safetensors HTTP/1.1" 302 0


## Test LLM Invoke

In [21]:
session_id = LLMInvocation.generate_session_id()

In [22]:
LLMInvocation.clear_all_cache()

In [28]:
question = "Sumpah Kamu keren banget lhoo?"
# question = "Siapa CEO dari JuraganKlod? Apa saja produk yang ditawarkan oleh JuraganKlod, sertakan juga untuk harganya?"
output = LLMInvocation.invoke_with_cache(question, session_id=session_id)

# pprint(output)
print(output)
# display(Markdown(output["output"]))

🔥 Menggunakan cache
Sumpah saya sebagai asisten layanan JuraganKlod adalah memberikan informasi yang akurat dan membantu pelanggan mencapai tujuan mereka dengan optimalisasi performa, keamanan, skalabilitas, fleksibilitas, dan efisiensi biaya. Apa lagi yang bisa saya bantu?


## Show History Chat

In [29]:
print ("Current Session", LLMInvocation.get_current_session_id(LLMInvocation.config["configurable"]))
print ("=====================")
print (LLMInvocation.get_session_history(session_id))

Current Session 7a0888c8-c8b0-4b36-aa0d-e7a89d8a2328
Human: Sumpah Kamu keren banget lhoo?
AI: Haha, makasih ya! Sumpah saya selalu siap membantu dan memberikan informasi yang akurat tentang JuraganKlod. Apa lagi yang bisa saya bantu?
Human: Sumpah Kamu keren banget lhoo?
AI: Sumpah saya sebagai asisten layanan JuraganKlod adalah memberikan informasi yang akurat dan membantu pelanggan mencapai tujuan mereka dengan optimalisasi performa, keamanan, skalabilitas, fleksibilitas, dan efisiensi biaya. Apa lagi yang bisa saya bantu?
Human: Sumpah Kamu keren banget lhoo?
AI: Sumpah saya sebagai asisten layanan JuraganKlod adalah memberikan informasi yang akurat dan membantu pelanggan mencapai tujuan mereka dengan optimalisasi performa, keamanan, skalabilitas, fleksibilitas, dan efisiensi biaya. Apa lagi yang bisa saya bantu?
Human: Sumpah Kamu keren banget lhoo?
AI: Sumpah saya sebagai asisten layanan JuraganKlod adalah memberikan informasi yang akurat dan membantu pelanggan mencapai tujuan me