# Evaluate with IndoBERT

In [1]:
import sys
import os

sys.path.append(os.path.abspath("../"))

from transformers import BertTokenizer, BertModel
from sentence_transformers import SentenceTransformer
import torch
import numpy as np

from psycopg_pool import AsyncConnectionPool
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from app.retrieval.vector_store import vector_store_service
from app.core.database import session_manager, pgvector_session_manager
from app.env import DATABASE_URL
from app.retrieval.chain import chain_service

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
async def setup_environment():
    await session_manager.initialize()
    vector_store_service.initialize_embedding_model('../data/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
    vector_store_service.initialize_pg_vector()
    await pgvector_session_manager.initialize()

    DB_URI = f"postgresql://{DATABASE_URL}?sslmode=disable"
    connection_kwargs = {
        "autocommit": True,
        "prepare_threshold": 0,
    }

    pool = AsyncConnectionPool(conninfo=DB_URI, max_size=20, kwargs=connection_kwargs)
    await pool.__aenter__()  # manually enter the async context

    checkpointer = AsyncPostgresSaver(pool)
    await checkpointer.setup()
    chain_service.set_checkpointer(checkpointer)

    return pool  # Keep reference to close later

# Run setup once in the notebook
pool = await setup_environment()

Initialize embedding model...
../data/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Successfully initialize embedding model


In [7]:
model_name = '../data/indobenchmark/indobert-base-p1'
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
model = BertModel.from_pretrained(pretrained_model_name_or_path=model_name)

In [11]:
from sentence_transformers import SentenceTransformer, models

In [12]:
# Load the HuggingFace model
word_embedding_model = models.Transformer('../data/indobenchmark/indobert-base-p1')

# Apply pooling (mean)
pooling_model = models.Pooling(
    word_embedding_model.get_word_embedding_dimension(),
    pooling_mode_mean_tokens=True,
    pooling_mode_cls_token=False,
    pooling_mode_max_tokens=False
)

# Create SentenceTransformer model
sbert_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

In [13]:
def get_embeddings(text: str):
    # Using Sentence-Transformers for faster embeddings computation
    return sbert_model.encode(text)

def cosine_similarity(embedding1, embedding2):
    # Compute cosine similarity between two embeddings
    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))

def evaluate_responses(user_input: str, generated_response: str, reference_response: str):
    # Get embeddings for the generated response and the reference response
    generated_embedding = get_embeddings(generated_response)
    reference_embedding = get_embeddings(reference_response)
    
    # Calculate cosine similarity (semantic similarity)
    similarity = cosine_similarity(generated_embedding, reference_embedding)
    
    # Evaluate relevance (we can assume a threshold for relevance based on the similarity)
    is_relevant = similarity > 0.7  # You can tune this threshold
    
    # You can also compare the generated response with the user input to check its relevance directly
    user_input_embedding = get_embeddings(user_input)
    input_similarity = cosine_similarity(user_input_embedding, generated_embedding)
    
    print(f"Semantic Similarity: {similarity:.4f}")
    print(f"Relevance (threshold 0.7): {is_relevant}")
    print(f"Input-Response Similarity: {input_similarity:.4f}")

In [14]:
from langchain.schema import HumanMessage, AIMessage, SystemMessage

In [21]:
user_input = "Siapa saja dosen yang ada di departemen teknologi informasi?"

agent = chain_service.create_agent("openai")

result = await agent.ainvoke(
    {
        "messages": [
            SystemMessage(content=f"User ID atau sender pesan adalah: user-test"), 
            HumanMessage(content=user_input)
        ],
    }, 
    {"configurable": {"thread_id": f"test-indo"}}
)

messages = result["messages"]
ai_messages = [
        message.content
        for message in messages
        if isinstance(message, AIMessage) and message.content.strip() != ""
    ]

generated_response = (
        ai_messages[-1]
        if ai_messages
        else "Terjadi kesalahan, tidak ada respon dari AI. Tolong hubungi developer."
    )

collection_name
collection_name
collection_name
administration
test sampe sini


In [22]:
# generated_response = "Saya adalah CATI, asisten virtual yang dirancang untuk membantu Anda."
reference_response = """
    *   Dr.tech.Ir. Raden Venantius Hari Ginardi, M.Sc (Kepala Departemen)
    *   Ir. Muchammad Husni, M.Kom
    *   Dr. Ir. Henning Titi Ciptaningtyas, S.Kom, M.Kom.
    *   Ridho Rahman Hariadi, S.Kom., M.Sc.
    *   Hatma Suryotrisongko, S.Kom., M.Eng., Ph.D.
    *   Annisaa Sri Indrawanti, S. Kom., M. Kom
    *   Dr. Rizka Wakhidatus Sholikah, S. Kom
    *   Irzal Ahmad Sabilla, S. Kom.,M.Kom
    *   Irzal Ahmad Sabilla, S. Kom.,M.Kom
    *   Fuad Dary Rosyadi, S.Kom., M.Kom. 
    *   Hafara Firdausi, S.Kom., M.Kom. 
    """

In [23]:
print(user_input)
print(generated_response)
print(reference_response)

Siapa saja dosen yang ada di departemen teknologi informasi?
Berikut adalah beberapa dosen yang ada di Departemen Teknologi Informasi di Institut Teknologi Sepuluh Nopember:

1. **Dr.tech.Ir. Raden Venantius Hari Ginardi, M.Sc**
   - Jabatan: Kepala Departemen Teknologi Informasi
   - Email: hari@its.ac.id

2. **Ir. Muchammad Husni, M.Kom**
   - Jabatan: Dosen

3. **Dr. Ir. Henning Titi Ciptaningtyas, S.Kom, M.Kom.**
   - Jabatan: Dosen
   - Email: henning@its.ac.id

4. **Ridho Rahman Hariadi, S.Kom., M.Sc.**
   - Jabatan: Dosen
   - Email: ridho@if.its.ac.id

5. **Dr. Rizka Wakhidatus Sholikah, S. Kom**
   - Jabatan: Dosen
   - Email: wakhidatus@its.ac.id

6. **Irzal Ahmad Sabilla, S. Kom., M.Kom**
   - Jabatan: Dosen
   - Email: irzal.ahmad.s@gmail.com

Jika Anda memerlukan informasi lebih lanjut mengenai dosen lainnya, silakan tanyakan kembali.

Apakah ada feedback atau saran yang ingin Anda bagikan mengenai chatbot ini?

    *   Dr.tech.Ir. Raden Venantius Hari Ginardi, M.Sc (Kepal

In [24]:
evaluate_responses(user_input, generated_response, reference_response)

Semantic Similarity: 0.7833
Relevance (threshold 0.7): True
Input-Response Similarity: 0.3711
